[lld] f27e454 - [lld-macho] Implement ICF
Greg McGary via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 17 10:09:24 PDT 2021
Author: Greg McGary
Date: 2021-06-17T10:07:44-07:00
New Revision: f27e4548fc42876f66dac260ca3b6df0d5fd5fd6
URL: https://github.com/llvm/llvm-project/commit/f27e4548fc42876f66dac260ca3b6df0d5fd5fd6
DIFF: https://github.com/llvm/llvm-project/commit/f27e4548fc42876f66dac260ca3b6df0d5fd5fd6.diff
LOG: [lld-macho] Implement ICF
ICF = Identical C(ode|OMDAT) Folding
This is the LLD ELF/COFF algorithm, adapted for MachO. So far, only `-icf all` is supported. In order to support `-icf safe`, we will need to port address-significance tables (`.addrsig` directives) to MachO, which will come in later diffs.
`check-{llvm,clang,lld}` have 0 regressions for `lld -icf all` vs. baseline ld64.
We only run ICF on `__TEXT,__text` for reasons explained in the block comment in `ConcatOutputSection.cpp`.
Here is the perf impact for linking `chromium_framekwork` on a Mac Pro (16-core Xeon W) for the non-ICF case vs. pre-ICF:
```
N Min Max Median Avg Stddev
x 20 4.27 4.44 4.34 4.349 0.043029977
+ 20 4.37 4.46 4.405 4.4115 0.025188761
Difference at 95.0% confidence
0.0625 +/- 0.0225658
1.43711% +/- 0.518873%
(Student's t, pooled s = 0.0352566)
```
Reviewed By: #lld-macho, int3
Differential Revision: https://reviews.llvm.org/D103292
Added:
lld/MachO/ICF.cpp
lld/MachO/ICF.h
lld/test/MachO/icf-options.s
lld/test/MachO/icf-scale.s
lld/test/MachO/icf.s
Modified:
lld/MachO/CMakeLists.txt
lld/MachO/ConcatOutputSection.cpp
lld/MachO/ConcatOutputSection.h
lld/MachO/Config.h
lld/MachO/Driver.cpp
lld/MachO/InputFiles.cpp
lld/MachO/InputSection.cpp
lld/MachO/InputSection.h
lld/MachO/Options.td
lld/MachO/Symbols.cpp
lld/MachO/SyntheticSections.cpp
lld/MachO/UnwindInfoSection.cpp
lld/MachO/UnwindInfoSection.h
lld/MachO/Writer.cpp
lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
Removed:
################################################################################
diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 61ac878ffbb7c..eff1812a6bb22 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -15,6 +15,7 @@ add_lld_library(lldMachO2
DriverUtils.cpp
Dwarf.cpp
ExportTrie.cpp
+ ICF.cpp
InputFiles.cpp
InputSection.cpp
LTO.cpp
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 7926503cfee57..232ab5ce39fd4 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -17,8 +17,7 @@
#include "lld/Common/Memory.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/ScopedPrinter.h"
-
-#include <algorithm>
+#include "llvm/Support/TimeProfiler.h"
using namespace llvm;
using namespace llvm::MachO;
@@ -357,3 +356,12 @@ void ConcatOutputSection::mergeFlags(InputSection *input) {
flags |= input->flags;
flags &= pureMask;
}
+
+void ConcatOutputSection::eraseOmittedInputSections() {
+ // Remove the duplicates from inputs
+ inputs.erase(std::remove_if(inputs.begin(), inputs.end(),
+ [](const ConcatInputSection *isec) -> bool {
+ return isec->shouldOmitFromOutput();
+ }),
+ inputs.end());
+}
diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
index e26869408dfd3..dad87e3388b2d 100644
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -40,6 +40,7 @@ class ConcatOutputSection final : public OutputSection {
void finalize() override;
bool needsThunks() const;
uint64_t estimateStubsInRangeVA(size_t callIdx) const;
+ void eraseOmittedInputSections();
void writeTo(uint8_t *buf) const override;
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 2f7611406bc03..722b19f712531 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -57,6 +57,13 @@ enum class UndefinedSymbolTreatment {
dynamic_lookup,
};
+enum class ICFLevel {
+ unknown,
+ none,
+ safe,
+ all,
+};
+
struct SectionAlign {
llvm::StringRef segName;
llvm::StringRef sectName;
@@ -126,6 +133,7 @@ struct Configuration {
NamespaceKind namespaceKind = NamespaceKind::twolevel;
UndefinedSymbolTreatment undefinedSymbolTreatment =
UndefinedSymbolTreatment::error;
+ ICFLevel icfLevel = ICFLevel::none;
llvm::MachO::HeaderFileType outputType;
std::vector<llvm::StringRef> systemLibraryRoots;
std::vector<llvm::StringRef> librarySearchPaths;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 8217cc583eac6..d2d271271349f 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -698,6 +698,29 @@ getUndefinedSymbolTreatment(const ArgList &args) {
return treatment;
}
+static ICFLevel getICFLevel(const ArgList &args) {
+ bool noDeduplicate = args.hasArg(OPT_no_deduplicate);
+ StringRef icfLevelStr = args.getLastArgValue(OPT_icf);
+ auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
+ .Cases("none", "", ICFLevel::none)
+ .Case("safe", ICFLevel::safe)
+ .Case("all", ICFLevel::all)
+ .Default(ICFLevel::unknown);
+ if (icfLevel == ICFLevel::unknown) {
+ warn(Twine("unknown -icf OPTION `") + icfLevelStr +
+ "', defaulting to `none'");
+ icfLevel = ICFLevel::none;
+ } else if (icfLevel != ICFLevel::none && noDeduplicate) {
+ warn(Twine("`-icf " + icfLevelStr +
+ "' conflicts with -no_deduplicate, setting to `none'"));
+ icfLevel = ICFLevel::none;
+ } else if (icfLevel == ICFLevel::safe) {
+ warn(Twine("`-icf safe' is not yet implemented, reverting to `none'"));
+ icfLevel = ICFLevel::none;
+ }
+ return icfLevel;
+}
+
static void warnIfDeprecatedOption(const Option &opt) {
if (!opt.getGroup().isValid())
return;
@@ -1096,6 +1119,8 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
config->undefinedSymbolTreatment = getUndefinedSymbolTreatment(args);
+ config->icfLevel = getICFLevel(args);
+
if (config->outputType == MH_EXECUTE)
config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"),
/*file=*/nullptr,
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
new file mode 100644
index 0000000000000..fe724799996d9
--- /dev/null
+++ b/lld/MachO/ICF.cpp
@@ -0,0 +1,257 @@
+//===- ICF.cpp ------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ICF.h"
+#include "ConcatOutputSection.h"
+#include "InputSection.h"
+#include "Symbols.h"
+#include "llvm/Support/Parallel.h"
+
+#include <atomic>
+
+using namespace llvm;
+using namespace lld;
+using namespace lld::macho;
+
+ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
+ icfInputs.assign(inputs.begin(), inputs.end());
+}
+
+// ICF = Identical Code Folding
+//
+// We only fold __TEXT,__text, so this is really "code" folding, and not
+// "COMDAT" folding. String and scalar constant literals are deduplicated
+// elsewhere.
+//
+// Summary of segments & sections:
+//
+// Since folding never occurs across output-section boundaries,
+// ConcatOutputSection is the natural input for ICF.
+//
+// The __TEXT segment is readonly at the MMU. Some sections are already
+// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
+// synthetic and inherently free of duplicates (__TEXT,__stubs &
+// __TEXT,__unwind_info). We only run ICF on __TEXT,__text. One might hope ICF
+// could work on __TEXT,__concat, but doing so induces many test failures.
+//
+// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
+// thus ineligible for ICF.
+//
+// The __DATA_CONST segment is read/write at the MMU, but is logically const to
+// the application after dyld applies fixups to pointer data. Some sections are
+// deduplicated elsewhere (__DATA_CONST,__cfstring), and some are synthetic
+// (__DATA_CONST,__got). There are no ICF opportunities here.
+//
+// The __DATA segment is read/write at the MMU, and as application-writeable
+// data, none of its sections are eligible for ICF.
+//
+// Please see the large block comment in lld/ELF/ICF.cpp for an explanation
+// of the segregation algorithm.
+//
+// FIXME(gkm): implement keep-unique attributes
+// FIXME(gkm): implement address-significance tables for MachO object files
+
+static unsigned icfPass = 0;
+static std::atomic<bool> icfRepeat{false};
+
+// Compare everything except the relocation referents
+static bool equalsConstant(const ConcatInputSection *ia,
+ const ConcatInputSection *ib) {
+ if (ia->data.size() != ib->data.size())
+ return false;
+ if (ia->data != ib->data)
+ return false;
+ if (ia->flags != ib->flags)
+ return false;
+ if (ia->relocs.size() != ib->relocs.size())
+ return false;
+ auto f = [&](const Reloc &ra, const Reloc &rb) {
+ if (ra.type != rb.type)
+ return false;
+ if (ra.pcrel != rb.pcrel)
+ return false;
+ if (ra.length != rb.length)
+ return false;
+ if (ra.offset != rb.offset)
+ return false;
+ if (ra.addend != rb.addend)
+ return false;
+ if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
+ return false; // a nice place to breakpoint
+ return true;
+ };
+ return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
+ f);
+}
+
+// Compare only the relocation referents
+static bool equalsVariable(const ConcatInputSection *ia,
+ const ConcatInputSection *ib) {
+ assert(ia->relocs.size() == ib->relocs.size());
+ auto f = [&](const Reloc &ra, const Reloc &rb) {
+ if (ra.referent == rb.referent)
+ return true;
+ if (ra.referent.is<Symbol *>()) {
+ const auto *sa = ra.referent.get<Symbol *>();
+ const auto *sb = rb.referent.get<Symbol *>();
+ if (sa->kind() != sb->kind())
+ return false;
+ if (isa<Defined>(sa)) {
+ const auto *da = dyn_cast<Defined>(sa);
+ const auto *db = dyn_cast<Defined>(sb);
+ if (da->value != db->value)
+ return false;
+ if (da->isAbsolute() != da->isAbsolute())
+ return false;
+ if (da->isec)
+ if (da->isec->icfEqClass[icfPass % 2] !=
+ db->isec->icfEqClass[icfPass % 2])
+ return false;
+ } else if (isa<DylibSymbol>(sa)) {
+ // There is one DylibSymbol per gotIndex and we already checked for
+ // symbol equality, thus we know that these must be
diff erent.
+ return false;
+ } else {
+ llvm_unreachable("equalsVariable symbol kind");
+ }
+ } else {
+ const auto *sa = ra.referent.get<InputSection *>();
+ const auto *sb = rb.referent.get<InputSection *>();
+ if (sa->icfEqClass[icfPass % 2] != sb->icfEqClass[icfPass % 2])
+ return false;
+ }
+ return true;
+ };
+ return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
+ f);
+}
+
+// Find the first InputSection after BEGIN whose equivalence class
diff ers
+size_t ICF::findBoundary(size_t begin, size_t end) {
+ uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
+ for (size_t i = begin + 1; i < end; ++i)
+ if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
+ return i;
+ return end;
+}
+
+// Invoke FUNC on subranges with matching equivalence class
+void ICF::forEachClassRange(size_t begin, size_t end,
+ std::function<void(size_t, size_t)> func) {
+ while (begin < end) {
+ size_t mid = findBoundary(begin, end);
+ func(begin, mid);
+ begin = mid;
+ }
+}
+
+// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
+// with matching equivalence class
+void ICF::forEachClass(std::function<void(size_t, size_t)> func) {
+ // Only use threads when the benefits outweigh the overhead.
+ const size_t threadingThreshold = 1024;
+ if (icfInputs.size() < threadingThreshold) {
+ forEachClassRange(0, icfInputs.size(), func);
+ ++icfPass;
+ return;
+ }
+
+ // Shard into non-overlapping intervals, and call FUNC in parallel. The
+ // sharding must be completed before any calls to FUNC are made so that FUNC
+ // can modify the InputSection in its shard without causing data races.
+ const size_t shards = 256;
+ size_t step = icfInputs.size() / shards;
+ size_t boundaries[shards + 1];
+ boundaries[0] = 0;
+ boundaries[shards] = icfInputs.size();
+ parallelForEachN(1, shards, [&](size_t i) {
+ boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
+ });
+ parallelForEachN(1, shards + 1, [&](size_t i) {
+ if (boundaries[i - 1] < boundaries[i]) {
+ forEachClassRange(boundaries[i - 1], boundaries[i], func);
+ }
+ });
+ ++icfPass;
+}
+
+void ICF::run() {
+ // Into each origin-section hash, combine all reloc referent section hashes.
+ for (icfPass = 0; icfPass < 2; ++icfPass) {
+ parallelForEach(icfInputs, [&](InputSection *isec) {
+ uint64_t hash = isec->icfEqClass[icfPass % 2];
+ for (const Reloc &r : isec->relocs) {
+ if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
+ if (auto *dylibSym = dyn_cast<DylibSymbol>(sym))
+ hash += dylibSym->stubsHelperIndex;
+ else if (auto *defined = dyn_cast<Defined>(sym))
+ hash +=
+ defined->value +
+ (defined->isec ? defined->isec->icfEqClass[icfPass % 2] : 0);
+ else
+ llvm_unreachable("foldIdenticalSections symbol kind");
+ }
+ }
+ // Set MSB to 1 to avoid collisions with non-hashed classes.
+ isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63);
+ });
+ }
+
+ llvm::stable_sort(icfInputs,
+ [](const InputSection *a, const InputSection *b) {
+ return a->icfEqClass[0] < b->icfEqClass[0];
+ });
+ forEachClass(
+ [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); });
+
+ // Split equivalence groups by comparing relocations until convergence
+ do {
+ icfRepeat = false;
+ forEachClass([&](size_t begin, size_t end) {
+ segregate(begin, end, equalsVariable);
+ });
+ } while (icfRepeat);
+ log("ICF needed " + Twine(icfPass) + " iterations");
+
+ // Fold sections within equivalence classes
+ forEachClass([&](size_t begin, size_t end) {
+ if (end - begin < 2)
+ return;
+ ConcatInputSection *beginIsec = icfInputs[begin];
+ for (size_t i = begin + 1; i < end; ++i)
+ beginIsec->foldIdentical(icfInputs[i]);
+ });
+}
+
+// Split an equivalence class into smaller classes.
+void ICF::segregate(
+ size_t begin, size_t end,
+ std::function<bool(const ConcatInputSection *, const ConcatInputSection *)>
+ equals) {
+ while (begin < end) {
+ // Divide [begin, end) into two. Let mid be the start index of the
+ // second group.
+ auto bound = std::stable_partition(icfInputs.begin() + begin + 1,
+ icfInputs.begin() + end,
+ [&](ConcatInputSection *isec) {
+ return equals(icfInputs[begin], isec);
+ });
+ size_t mid = bound - icfInputs.begin();
+
+ // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
+ // equivalence class ID because every group ends with a unique index.
+ for (size_t i = begin; i < mid; ++i)
+ icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
+
+ // If we created a group, we need to iterate the main loop again.
+ if (mid != end)
+ icfRepeat = true;
+
+ begin = mid;
+ }
+}
diff --git a/lld/MachO/ICF.h b/lld/MachO/ICF.h
new file mode 100644
index 0000000000000..767630f0d7eb1
--- /dev/null
+++ b/lld/MachO/ICF.h
@@ -0,0 +1,42 @@
+//===- ICF.h ----------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_ICF_H
+#define LLD_MACHO_ICF_H
+
+#include "lld/Common/LLVM.h"
+#include <vector>
+
+namespace lld {
+namespace macho {
+
+class ConcatInputSection;
+
+class ICF {
+public:
+ ICF(std::vector<ConcatInputSection *> &inputs);
+
+ void run();
+ void segregate(size_t begin, size_t end,
+ std::function<bool(const ConcatInputSection *,
+ const ConcatInputSection *)>
+ equals);
+ size_t findBoundary(size_t begin, size_t end);
+ void forEachClassRange(size_t begin, size_t end,
+ std::function<void(size_t, size_t)> func);
+ void forEachClass(std::function<void(size_t, size_t)> func);
+
+ // ICF needs a copy of the inputs vector because its equivalence-class
+ // segregation algorithm destroys the proper sequence.
+ std::vector<ConcatInputSection *> icfInputs;
+};
+
+} // namespace macho
+} // namespace lld
+
+#endif
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 2fc08ff911c2f..d8d6734c737ff 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -642,10 +642,16 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
auto *concatIsec = cast<ConcatInputSection>(isec);
auto *nextIsec = make<ConcatInputSection>(*concatIsec);
- nextIsec->data = isec->data.slice(symbolOffset);
nextIsec->numRefs = 0;
nextIsec->wasCoalesced = false;
- isec->data = isec->data.slice(0, symbolOffset);
+ if (isZeroFill(isec->flags)) {
+ // Zero-fill sections have NULL data.data() non-zero data.size()
+ nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
+ isec->data = {nullptr, symbolOffset};
+ } else {
+ nextIsec->data = isec->data.slice(symbolOffset);
+ isec->data = isec->data.slice(0, symbolOffset);
+ }
// By construction, the symbol will be at offset zero in the new
// subsection.
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index b56e496f15670..c656efc380249 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -12,6 +12,7 @@
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
+#include "UnwindInfoSection.h"
#include "Writer.h"
#include "lld/Common/Memory.h"
#include "llvm/Support/Endian.h"
@@ -44,6 +45,67 @@ static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) {
return sym->getVA();
}
+// ICF needs to hash any section that might potentially be duplicated so
+// that it can match on content rather than identity.
+bool InputSection::isHashableForICF(bool isText) const {
+ if (auto const *concatIsec = dyn_cast<ConcatInputSection>(this))
+ if (concatIsec->shouldOmitFromOutput())
+ return false;
+ switch (sectionType(flags)) {
+ case S_REGULAR:
+ if (isText)
+ return !hasPersonality;
+ // One might hope that we could hash __TEXT,__const subsections to fold
+ // references to duplicated values, but alas, many tests fail.
+ return false;
+ case S_CSTRING_LITERALS:
+ case S_4BYTE_LITERALS:
+ case S_8BYTE_LITERALS:
+ case S_16BYTE_LITERALS:
+ case S_LITERAL_POINTERS:
+ // FIXME(gkm): once literal sections are deduplicated, their content and
+ // identity correlate, so we can assign unique IDs to them rather than hash
+ // them.
+ return true;
+ case S_ZEROFILL:
+ case S_GB_ZEROFILL:
+ case S_NON_LAZY_SYMBOL_POINTERS:
+ case S_LAZY_SYMBOL_POINTERS:
+ case S_SYMBOL_STUBS:
+ case S_MOD_INIT_FUNC_POINTERS:
+ case S_MOD_TERM_FUNC_POINTERS:
+ case S_COALESCED:
+ case S_INTERPOSING:
+ case S_DTRACE_DOF:
+ case S_LAZY_DYLIB_SYMBOL_POINTERS:
+ case S_THREAD_LOCAL_REGULAR:
+ case S_THREAD_LOCAL_ZEROFILL:
+ case S_THREAD_LOCAL_VARIABLES:
+ case S_THREAD_LOCAL_VARIABLE_POINTERS:
+ case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
+ return false;
+ default:
+ llvm_unreachable("Section type");
+ }
+}
+
+void InputSection::hashForICF() {
+ assert(data.data()); // zeroFill section data has nullptr with non-zero size
+ assert(icfEqClass[0] == 0); // don't overwrite a unique ID!
+ // Turn-on the top bit to guarantee that valid hashes have no collisions
+ // with the small-integer unique IDs for ICF-ineligible sections
+ icfEqClass[0] = xxHash64(data) | (1ull << 63);
+}
+
+void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
+ align = std::max(align, copy->align);
+ copy->live = false;
+ copy->wasCoalesced = true;
+ numRefs += copy->numRefs;
+ copy->numRefs = 0;
+ copy->replacement = this;
+}
+
void ConcatInputSection::writeTo(uint8_t *buf) {
assert(!shouldOmitFromOutput());
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index e0d463ea30bc0..d713b6bbe0d78 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -23,6 +23,7 @@ namespace macho {
class InputFile;
class OutputSection;
+class Defined;
class InputSection {
public:
@@ -54,8 +55,20 @@ class InputSection {
uint32_t align = 1;
uint32_t flags = 0;
uint32_t callSiteCount = 0;
- bool isFinal = false; // is address assigned?
+ // is address assigned?
+ bool isFinal = false;
+
+ bool isHashableForICF(bool isText) const;
+ void hashForICF();
+ InputSection *canonical() { return replacement ? replacement : this; }
+
+ // ICF can't fold functions with LSDA+personality
+ bool hasPersonality = false;
+ // Points to the surviving section after this one is folded by ICF
+ InputSection *replacement = nullptr;
+ // Equivalence-class ID for ICF
+ uint64_t icfEqClass[2] = {0, 0};
ArrayRef<uint8_t> data;
std::vector<Reloc> relocs;
@@ -98,6 +111,8 @@ class ConcatInputSection final : public InputSection {
return isec->kind() == ConcatKind;
}
+ void foldIdentical(ConcatInputSection *redundant);
+
// With subsections_via_symbols, most symbols have their own InputSection,
// and for weak symbols (e.g. from inline functions), only the
// InputSection from one translation unit will make it to the output,
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 481d7aa6cb25d..9f9b8d301528a 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -275,6 +275,13 @@ def no_branch_islands : Flag<["-"], "no_branch_islands">,
HelpText<"Disable infra for branches beyond the maximum branch distance.">,
Flags<[HelpHidden]>,
Group<grp_opts>;
+def icf: Separate<["-"], "icf">,
+ HelpText<"Set level for identical code folding (default: none)">,
+ MetaVarName<"[none,safe,all]">,
+ Group<grp_opts>;
+def no_deduplicate : Flag<["-"], "no_deduplicate">,
+ HelpText<"Disable code deduplicaiton (synonym for `-icf none')">,
+ Group<grp_opts>;
def grp_version : OptionGroup<"version">, HelpText<"VERSION TARGETING">;
@@ -601,10 +608,6 @@ def no_weak_imports : Flag<["-"], "no_weak_imports">,
HelpText<"Fail if any symbols are weak imports, allowed to be NULL at runtime">,
Flags<[HelpHidden]>,
Group<grp_rare>;
-def no_deduplicate : Flag<["-"], "no_deduplicate">,
- HelpText<"Omit the deduplication pass">,
- Flags<[HelpHidden]>,
- Group<grp_rare>;
def verbose_deduplicate : Flag<["-"], "verbose_deduplicate">,
HelpText<"Print function names eliminated by deduplication and the total size of code savings">,
Flags<[HelpHidden]>,
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 26a4a873677b8..47f30d4141fce 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -40,7 +40,9 @@ bool Symbol::isLive() const {
// no_dead_strip or live_support. In that case, the section will know
// that it's live but `used` might be false. Non-absolute symbols always
// have to use the section's `live` bit as source of truth.
- return d->isAbsolute() ? used : d->isec->isLive(d->value);
+ if (d->isAbsolute())
+ return used;
+ return d->isec->canonical()->isLive(d->value);
}
assert(!isa<CommonSymbol>(this) &&
@@ -57,7 +59,7 @@ uint64_t Defined::getVA() const {
if (isAbsolute())
return value;
- if (!isec->isFinal) {
+ if (!isec->canonical()->isFinal) {
// A target arch that does not use thunks ought never ask for
// the address of a function that has not yet been finalized.
assert(target->usesThunks());
@@ -68,7 +70,7 @@ uint64_t Defined::getVA() const {
// expedient to return a contrived out-of-range address.
return TargetInfo::outOfRangeVA;
}
- return isec->getVA(value);
+ return isec->canonical()->getVA(value);
}
uint64_t DylibSymbol::getVA() const {
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index ef84a9d7c0e8d..fec43002bb461 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -647,6 +647,9 @@ void FunctionStartsSection::finalizeContents() {
if (const auto *defined = dyn_cast<Defined>(sym)) {
if (!defined->isec || !isCodeSection(defined->isec) || !defined->isLive())
continue;
+ if (const auto *concatIsec = dyn_cast<ConcatInputSection>(defined->isec))
+ if (concatIsec->shouldOmitFromOutput())
+ continue;
// TODO: Add support for thumbs, in that case
// the lowest bit of nextAddr needs to be set to 1.
addrs.push_back(defined->getVA());
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 309e5ce64d73c..deeef18f0bb36 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -143,12 +143,18 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
// work. But since there are usually just few personality functions
// that are referenced from many places, at least some of them likely
// live, it wouldn't reduce number of got entries.
- for (Reloc &r : isec->relocs) {
+ for (size_t i = 0; i < isec->relocs.size(); ++i) {
+ Reloc &r = isec->relocs[i];
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
offsetof(CompactUnwindEntry<Ptr>, personality))
continue;
+ Reloc &rFunc = isec->relocs[++i];
+ assert(r.offset ==
+ rFunc.offset + offsetof(CompactUnwindEntry<Ptr>, personality));
+ rFunc.referent.get<InputSection *>()->hasPersonality = true;
+
if (auto *s = r.referent.dyn_cast<Symbol *>()) {
if (auto *undefined = dyn_cast<Undefined>(s)) {
treatUndefinedSymbol(*undefined);
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 4f181099a5831..7ccf7a4dfde7e 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -13,9 +13,6 @@
#include "SyntheticSections.h"
#include "mach-o/compact_unwind_encoding.h"
-#include "llvm/ADT/DenseMap.h"
-
-#include <vector>
namespace lld {
namespace macho {
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 523f5013e22fc..8df5ac1494fd3 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -9,6 +9,7 @@
#include "Writer.h"
#include "ConcatOutputSection.h"
#include "Config.h"
+#include "ICF.h"
#include "InputFiles.h"
#include "InputSection.h"
#include "MapFile.h"
@@ -51,6 +52,7 @@ class Writer {
void scanSymbols();
template <class LP> void createOutputSections();
template <class LP> void createLoadCommands();
+ void foldIdenticalSections();
void finalizeAddresses();
void finalizeLinkEditSegment();
void assignAddresses(OutputSegment *);
@@ -76,6 +78,7 @@ class Writer {
LCUuid *uuidCommand = nullptr;
OutputSegment *linkEditSegment = nullptr;
+ DenseMap<NamePair, ConcatOutputSection *> concatOutputSections;
};
// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@@ -885,7 +888,6 @@ template <class LP> void Writer::createOutputSections() {
}
// Then add input sections to output sections.
- DenseMap<NamePair, ConcatOutputSection *> concatOutputSections;
for (const auto &p : enumerate(inputSections)) {
InputSection *isec = p.value();
OutputSection *osec;
@@ -940,6 +942,47 @@ template <class LP> void Writer::createOutputSections() {
linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit);
}
+void Writer::foldIdenticalSections() {
+ if (config->icfLevel == ICFLevel::none)
+ return;
+ ConcatOutputSection *textOutputSection = concatOutputSections.lookup(
+ maybeRenameSection({segment_names::text, section_names::text}));
+ if (textOutputSection == nullptr)
+ return;
+
+ TimeTraceScope timeScope("Fold Identical Code Sections");
+ // The ICF equivalence-class segregation algorithm relies on pre-computed
+ // hashes of InputSection::data for the ConcatOutputSection::inputs and all
+ // sections referenced by their relocs. We could recursively traverse the
+ // relocs to find every referenced InputSection, but that precludes easy
+ // parallelization. Therefore, we hash every InputSection here where we have
+ // them all accessible as a simple vector.
+ std::vector<InputSection *> hashable;
+ // If an InputSection is ineligible for ICF, we give it a unique ID to force
+ // it into an unfoldable singleton equivalence class. Begin the unique-ID
+ // space at inputSections.size(), so that it will never intersect with
+ // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
+ // coexist with equivalence-class IDs, this is not necessary, but might help
+ // someone keep the numbers straight in case we ever need to debug the
+ // ICF::segregate()
+ uint64_t icfUniqueID = inputSections.size();
+ for (InputSection *isec : inputSections) {
+ if (isec->isHashableForICF(isec->parent == textOutputSection))
+ hashable.push_back(isec);
+ else
+ isec->icfEqClass[0] = ++icfUniqueID;
+ }
+ parallelForEach(hashable, [](InputSection *isec) { isec->hashForICF(); });
+ // Now that every input section is either hashed or marked as unique,
+ // run the segregation algorithm to detect foldable subsections
+ ICF(textOutputSection->inputs).run();
+ size_t oldSize = textOutputSection->inputs.size();
+ textOutputSection->eraseOmittedInputSections();
+ size_t newSize = textOutputSection->inputs.size();
+ log("ICF kept " + Twine(newSize) + " removed " + Twine(oldSize - newSize) +
+ " of " + Twine(oldSize));
+}
+
void Writer::finalizeAddresses() {
TimeTraceScope timeScope("Finalize addresses");
uint64_t pageSize = target->getPageSize();
@@ -1071,6 +1114,7 @@ template <class LP> void Writer::run() {
in.stubHelper->setup();
scanSymbols();
createOutputSections<LP>();
+ foldIdenticalSections();
// After this point, we create no new segments; HOWEVER, we might
// yet create branch-range extension thunks for architectures whose
// hardware call instructions have limited range, e.g., ARM(64).
diff --git a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
index 716905997a912..f27ca44b0f861 100644
--- a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
+++ b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
@@ -68,5 +68,5 @@ parent-umbrella:
umbrella: System
exports:
- targets: [ x86_64-macos, x86_64-maccatalyst, arm64-macos ]
- symbols: [ ___nan ]
+ symbols: [ ___nan, ___isnan, ___inf, ___isinf ]
...
diff --git a/lld/test/MachO/icf-options.s b/lld/test/MachO/icf-options.s
new file mode 100644
index 0000000000000..4aca312ba81d8
--- /dev/null
+++ b/lld/test/MachO/icf-options.s
@@ -0,0 +1,65 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; mkdir %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
+# RUN: %lld -lSystem -icf all -o %t/all %t/main.o 2>&1 \
+# RUN: | FileCheck %s --check-prefix=DIAG-EMPTY --allow-empty
+# RUN: %lld -lSystem -icf none -o %t/none %t/main.o 2>&1 \
+# RUN: | FileCheck %s --check-prefix=DIAG-EMPTY --allow-empty
+# RUN: %lld -lSystem -no_deduplicate -o %t/no_dedup %t/main.o 2>&1 \
+# RUN: | FileCheck %s --check-prefix=DIAG-EMPTY --allow-empty
+# RUN: not %lld -lSystem -icf safe -o %t/safe %t/main.o 2>&1 \
+# RUN: | FileCheck %s --check-prefix=DIAG-SAFE
+# RUN: not %lld -lSystem -icf junk -o %t/junk %t/main.o 2>&1 \
+# RUN: | FileCheck %s --check-prefix=DIAG-JUNK
+# RUN: not %lld -lSystem -icf all -no_deduplicate -o %t/clash %t/main.o 2>&1 \
+# RUN: | FileCheck %s --check-prefix=DIAG-CLASH
+
+# DIAG-EMPTY-NOT: {{.}}
+# DIAG-SAFE: `-icf safe' is not yet implemented, reverting to `none'
+# DIAG-JUNK: unknown -icf OPTION `junk', defaulting to `none'
+# DIAG-CLASH: `-icf all' conflicts with -no_deduplicate, setting to `none'
+
+# RUN: llvm-objdump -d --syms %t/all | FileCheck %s --check-prefix=FOLD
+# RUN: llvm-objdump -d --syms %t/none | FileCheck %s --check-prefix=NOOP
+# RUN: llvm-objdump -d --syms %t/no_dedup | FileCheck %s --check-prefix=NOOP
+
+# FOLD-LABEL: SYMBOL TABLE:
+# FOLD: [[#%x,MAIN:]] g F __TEXT,__text _main
+# FOLD: [[#%x,F:]] g F __TEXT,__text _f1
+# FOLD: [[#%x,F]] g F __TEXT,__text _f2
+
+# FOLD-LABEL: Disassembly of section __TEXT,__text:
+# FOLD: [[#%x,MAIN]] <_main>:
+# FOLD-NEXT: callq 0x[[#%x,F]] <_f2>
+# FOLD-NEXT: callq 0x[[#%x,F]] <_f2>
+
+# NOOP-LABEL: SYMBOL TABLE:
+# NOOP: [[#%x,MAIN:]] g F __TEXT,__text _main
+# NOOP: [[#%x,F1:]] g F __TEXT,__text _f1
+# NOOP: [[#%x,F2:]] g F __TEXT,__text _f2
+
+# NOOP-LABEL: Disassembly of section __TEXT,__text:
+# NOOP: [[#%x,MAIN]] <_main>:
+# NOOP-NEXT: callq 0x[[#%x,F1]] <_f1>
+# NOOP-NEXT: callq 0x[[#%x,F2]] <_f2>
+
+.subsections_via_symbols
+.text
+.p2align 2
+
+.globl _f1
+_f1:
+ movl $0, %eax
+ ret
+
+.globl _f2
+_f2:
+ movl $0, %eax
+ ret
+
+.globl _main
+_main:
+ callq _f1
+ callq _f2
+ ret
diff --git a/lld/test/MachO/icf-scale.s b/lld/test/MachO/icf-scale.s
new file mode 100644
index 0000000000000..211ea945f4ca0
--- /dev/null
+++ b/lld/test/MachO/icf-scale.s
@@ -0,0 +1,81 @@
+# REQUIRES: x86
+# RUN: rm -rf %t*
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: %lld -lSystem -icf all -o %t %t.o
+# RUN: llvm-objdump -d --syms %t | FileCheck %s
+
+## When ICF has fewer than 1 Ki functions to segregate into equivalence classes,
+## it uses a sequential algorithm to avoid the overhead of threading.
+## At 1 Ki functions or more, when threading begins to pay-off, ICF employs its
+## parallel segregation algorithm. Here we generate 4 Ki functions to exercise
+## the parallel algorithm. There are 4 unique function bodies, each replicated
+## 1 Ki times. The resulting folded program should retain one instance for each
+## of the four unique functions.
+
+# CHECK-LABEL: SYMBOL TABLE:
+# CHECK: [[#%x,G0:]] g F __TEXT,__text _g000000
+# CHECK: [[#%x,G1:]] g F __TEXT,__text _g100000
+# CHECK: [[#%x,G2:]] g F __TEXT,__text _g200000
+# CHECK: [[#%x,G3:]] g F __TEXT,__text _g300000
+## . . . many intervening _gXXXXXX symbols
+# CHECK: [[#%x,G0]] g F __TEXT,__text _g033333
+# CHECK: [[#%x,G1]] g F __TEXT,__text _g133333
+# CHECK: [[#%x,G2]] g F __TEXT,__text _g233333
+# CHECK: [[#%x,G3]] g F __TEXT,__text _g333333
+
+# CHECK-LABEL: Disassembly of section __TEXT,__text:
+# CHECK-DAG: [[#%x,G0]] <_g033333>:
+# CHECK-DAG: [[#%x,G1]] <_g133333>:
+# CHECK-DAG: [[#%x,G2]] <_g233333>:
+# CHECK-DAG: [[#%x,G3]] <_g333333>:
+# CHECK-NOT: [[#]] <_g{{.*}}>:
+
+.subsections_via_symbols
+.text
+.p2align 2
+
+.macro gen_4 c
+ .globl _g0\c, _g1\c, _g2\c, _g3\c
+ _g0\c:; movl $0, %eax; ret
+ _g1\c:; movl $1, %eax; ret
+ _g2\c:; movl $2, %eax; ret
+ _g3\c:; movl $3, %eax; ret
+.endm
+
+.macro gen_16 c
+ gen_4 0\c
+ gen_4 1\c
+ gen_4 2\c
+ gen_4 3\c
+.endm
+
+.macro gen_64 c
+ gen_16 0\c
+ gen_16 1\c
+ gen_16 2\c
+ gen_16 3\c
+.endm
+
+.macro gen_256 c
+ gen_64 0\c
+ gen_64 1\c
+ gen_64 2\c
+ gen_64 3\c
+.endm
+
+.macro gen_1024 c
+ gen_256 0\c
+ gen_256 1\c
+ gen_256 2\c
+ gen_256 3\c
+.endm
+
+gen_1024 0
+gen_1024 1
+gen_1024 2
+gen_1024 3
+
+.globl _main
+_main:
+ ret
diff --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s
new file mode 100644
index 0000000000000..3fbfcaebffd46
--- /dev/null
+++ b/lld/test/MachO/icf.s
@@ -0,0 +1,205 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; mkdir %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
+# RUN: %lld -lSystem -icf all -o %t/main %t/main.o
+# RUN: llvm-objdump -d --syms %t/main | FileCheck %s
+
+# CHECK-LABEL: SYMBOL TABLE:
+# CHECK: [[#%x,MAIN:]] g F __TEXT,__text _main
+# CHECK: [[#%x,A:]] g F __TEXT,__text _a1
+# CHECK: [[#%x,A]] g F __TEXT,__text _a2
+# CHECK: [[#%x,C:]] g F __TEXT,__text _c
+# CHECK: [[#%x,D:]] g F __TEXT,__text _d
+# CHECK: [[#%x,E:]] g F __TEXT,__text _e
+# CHECK: [[#%x,F:]] g F __TEXT,__text _f
+# CHECK: [[#%x,G:]] g F __TEXT,__text _g
+# CHECK: [[#%x,SR:]] g F __TEXT,__text _sr1
+# CHECK: [[#%x,SR]] g F __TEXT,__text _sr2
+# CHECK: [[#%x,MR:]] g F __TEXT,__text _mr1
+# CHECK: [[#%x,MR]] g F __TEXT,__text _mr2
+### FIXME: Mutually-recursive functions with identical bodies (see below)
+# COM: [[#%x,XR:]] g F __TEXT,__text _xr1
+# COM: [[#%x,XR]] g F __TEXT,__text _xr2
+
+# CHECK-LABEL: Disassembly of section __TEXT,__text:
+# CHECK: [[#%x,MAIN]] <_main>:
+# CHECK-NEXT: callq 0x[[#%x,A]] <_a2>
+# CHECK-NEXT: callq 0x[[#%x,A]] <_a2>
+# CHECK-NEXT: callq 0x[[#%x,C]] <_c>
+# CHECK-NEXT: callq 0x[[#%x,D]] <_d>
+# CHECK-NEXT: callq 0x[[#%x,E]] <_e>
+# CHECK-NEXT: callq 0x[[#%x,F]] <_f>
+# CHECK-NEXT: callq 0x[[#%x,G]] <_g>
+# CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2>
+# CHECK-NEXT: callq 0x[[#%x,SR]] <_sr2>
+# CHECK-NEXT: callq 0x[[#%x,MR]] <_mr2>
+# CHECK-NEXT: callq 0x[[#%x,MR]] <_mr2>
+### FIXME: Mutually-recursive functions with identical bodies (see below)
+# COM-NEXT: callq 0x[[#%x,XR]] <_xr2>
+# COM-NEXT: callq 0x[[#%x,XR]] <_xr2>
+
+### TODO:
+### * Fold: funcs only
diff er in alignment
+### * No fold: func has personality/LSDA
+### * No fold: reloc references to absolute symbols with
diff erent values
+### * No fold: func is weak? preemptable?
+### * No fold: relocs to N_ALT_ENTRY symbols
+
+.subsections_via_symbols
+.text
+
+### Fold: _a1 & _a2 have identical bodies, flags, relocs
+
+.globl _a1
+.p2align 4, 0x90
+_a1:
+ callq _d
+ mov ___nan at GOTPCREL(%rip), %rax
+ callq ___isnan
+ movl $0, %eax
+ ret
+
+.globl _a2
+.p2align 4, 0x90
+_a2:
+ callq _d
+ mov ___nan at GOTPCREL(%rip), %rax
+ callq ___isnan
+ movl $0, %eax
+ ret
+
+### No fold: _c has slightly
diff erent body from _a1 & _a2
+
+.globl _c
+.p2align 4, 0x90
+_c:
+ callq _d
+ mov ___nan at GOTPCREL(%rip), %rax
+ callq ___isnan
+ movl $1, %eax
+ ret
+
+### No fold: _d has the same body as _a1 & _a2, but _d is recursive!
+
+.globl _d
+.p2align 4, 0x90
+_d:
+ callq _d
+ mov ___nan at GOTPCREL(%rip), %rax
+ callq ___isnan
+ movl $0, %eax
+ ret
+
+### No fold: the body of _e is longer
+
+.globl _e
+.p2align 4, 0x90
+_e:
+ callq _d
+ mov ___nan at GOTPCREL(%rip), %rax
+ callq ___isnan
+ movl $0, %eax
+ ret
+ nop
+
+### No fold: the dylib symbols
diff er
+
+.globl _f
+.p2align 4, 0x90
+_f:
+ callq _d
+ mov ___inf at GOTPCREL(%rip), %rax
+ callq ___isnan
+ movl $0, %eax
+ ret
+
+.globl _g
+.p2align 4, 0x90
+_g:
+ callq _d
+ mov ___inf at GOTPCREL(%rip), %rax
+ callq ___isinf
+ movl $0, %eax
+ ret
+
+### Fold: Simple recursion
+
+.globl _sr1
+.p2align 4, 0x90
+_sr1:
+ callq _sr1
+ movl $2, %eax
+ ret
+
+.globl _sr2
+.p2align 4, 0x90
+_sr2:
+ callq _sr2
+ movl $2, %eax
+ ret
+
+### Fold: Mutually-recursive functions with symmetric bodies
+
+.globl _mr1
+.p2align 4, 0x90
+_mr1:
+ callq _mr1 # call myself
+ callq _mr2 # call my twin
+ movl $1, %eax
+ ret
+
+.globl _mr2
+.p2align 4, 0x90
+_mr2:
+ callq _mr2 # call myself
+ callq _mr1 # call my twin
+ movl $1, %eax
+ ret
+
+### Fold: Mutually-recursive functions with identical bodies
+###
+### FIXME: This test is currently broken. Recursive call sites have no relocs
+### and the non-zero displacement field is already written to the section
+### data, while non-recursive call sites use symbol relocs and section data
+### contains zeros in the displacement field. Thus, ICF's equalsConstant()
+### finds that the section data doesn't match.
+###
+### ELF folds this case properly because it emits symbol relocs for all calls,
+### even recursive ones.
+
+.globl _xr1
+.p2align 4, 0x90
+_xr1:
+ callq _xr1 # call myself
+ callq _xr2 # call my twin
+ movl $3, %eax
+ ret
+
+.globl _xr2
+.p2align 4, 0x90
+_xr2:
+ callq _xr1 # call my twin
+ callq _xr2 # call myself
+ movl $3, %eax
+ ret
+
+###
+
+.globl _main
+.p2align 4, 0x90
+_main:
+ callq _a1
+ callq _a2
+ callq _c
+ callq _d
+ callq _e
+ callq _f
+ callq _g
+ callq _sr1
+ callq _sr2
+ callq _mr1
+ callq _mr2
+ callq _xr1
+ callq _xr2
+ ret
More information about the llvm-commits
mailing list