[lld] [lld-macho][arm64] Enhance safe ICF with thunk-based deduplication (PR #106573)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 13:13:06 PDT 2024
https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/106573
>From e2f8e5c4ed8be9a453f92227e04f30fbc9d1d3d3 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 29 Aug 2024 07:23:19 -0700
Subject: [PATCH 1/9] [lld-macho] Enhance safe ICF with thunk-based
deduplication
Currently, our `safe` ICF mode only merges non-address-significant code, leaving duplicate address-significant functions in the output. This patch introduces `safe_thunks` ICF mode, which keeps a single master copy of each function and replaces address-significant duplicates with thunks that branch to the master copy.
---
lld/MachO/Arch/ARM64.cpp | 23 +++
lld/MachO/Config.h | 1 +
lld/MachO/Driver.cpp | 10 +-
lld/MachO/ICF.cpp | 86 ++++++++++-
lld/MachO/Target.h | 10 ++
lld/test/MachO/icf-safe-thunks.ll | 241 ++++++++++++++++++++++++++++++
6 files changed, 367 insertions(+), 4 deletions(-)
create mode 100644 lld/test/MachO/icf-safe-thunks.ll
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index e192676394c965..30b6c27ff99f89 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -41,6 +41,10 @@ struct ARM64 : ARM64Common {
Symbol *objcMsgSend) const override;
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
+
+ virtual void initICFSafeThunkBody(InputSection *thunk,
+ InputSection *branchTarget) const override;
+ virtual uint32_t getICFSafeThunkSize() const override;
};
} // namespace
@@ -175,6 +179,25 @@ void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
/*offset=*/0, /*addend=*/0,
/*referent=*/funcSym);
}
+// Just a single direct branch to the target function.
+static constexpr uint32_t icfSafeThunkCode[] = {
+ 0x94000000, // 08: b target
+};
+
+void ARM64::initICFSafeThunkBody(InputSection *thunk,
+ InputSection *branchTarget) const {
+ // The base data here will not be itself modified, we'll just be adding a
+ // reloc below. So we can directly use the constexpr above as the data.
+ thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
+ sizeof(icfSafeThunkCode)};
+
+ thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
+ /*pcrel=*/true, /*length=*/2,
+ /*offset=*/0, /*addend=*/0,
+ /*referent=*/branchTarget);
+}
+
+uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
ARM64::ARM64() : ARM64Common(LP64()) {
cpuType = CPU_TYPE_ARM64;
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 5beb0662ba7274..4e940693602c95 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -68,6 +68,7 @@ enum class ICFLevel {
unknown,
none,
safe,
+ safe_thunks,
all,
};
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 6a1ff96ed65697..7b23fdfea1303d 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -847,8 +847,15 @@ static ICFLevel getICFLevel(const ArgList &args) {
auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
.Cases("none", "", ICFLevel::none)
.Case("safe", ICFLevel::safe)
+ .Case("safe_thunks", ICFLevel::safe_thunks)
.Case("all", ICFLevel::all)
.Default(ICFLevel::unknown);
+
+ if (icfLevel == ICFLevel::safe_thunks &&
+ !is_contained({AK_x86_64h, AK_arm64}, config->arch())) {
+ error("--icf=safe_thunks is only supported on arm64 targets");
+ }
+
if (icfLevel == ICFLevel::unknown) {
warn(Twine("unknown --icf=OPTION `") + icfLevelStr +
"', defaulting to `none'");
@@ -2104,7 +2111,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
// foldIdenticalLiterals before foldIdenticalSections.
foldIdenticalLiterals();
if (config->icfLevel != ICFLevel::none) {
- if (config->icfLevel == ICFLevel::safe)
+ if (config->icfLevel == ICFLevel::safe ||
+ config->icfLevel == ICFLevel::safe_thunks)
markAddrSigSymbols();
foldIdenticalSections(/*onlyCfStrings=*/false);
} else if (config->dedupStrings) {
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index fc786b571dc64f..cac5e673986829 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -45,6 +45,7 @@ class ICF {
const ConcatInputSection *ib);
bool equalsVariable(const ConcatInputSection *ia,
const ConcatInputSection *ib);
+ void applySafeThunksToRange(size_t begin, size_t end);
// ICF needs a copy of the inputs vector because its equivalence-class
// segregation algorithm destroys the proper sequence.
@@ -251,6 +252,63 @@ void ICF::forEachClassRange(size_t begin, size_t end,
}
}
+// Given a range of identical icfInputs's, replace address significant functions
+// with a thunk that is just a direct branch to the first function in the
+// series. This way we end up we keep only one main body of the function but we
+// still retain address uniqueness of rellevant functions by having them be a
+// direct branch thunk rather than contain a full copy of the actual function
+// body.
+void ICF::applySafeThunksToRange(size_t begin, size_t end) {
+ // If we need to create a unique ICF thunk, use the first section as the
+ // section that all thunks will branch to.
+ ConcatInputSection *masterIsec = icfInputs[begin];
+ uint32_t thunkSize = target->getICFSafeThunkSize();
+ static std::mutex thunkInsertionMutex;
+
+ uint32_t keepUniqueCount = masterIsec->keepUnique ? 1 : 0;
+ for (size_t i = begin + 1; i < end; ++i) {
+ ConcatInputSection *isec = icfInputs[i];
+ if (isec->keepUnique)
+ ++keepUniqueCount;
+
+ // We create thunks for the 2nd, 3rd, ... keepUnique sections. The first
+ // keepUnique section we leave as is - as it will not end up sharing an
+ // address with any other keepUnique section.
+ if (keepUniqueCount >= 2 && isec->keepUnique) {
+ // If the target to be folded is smaller than the thunk size, then just
+ // leave it as-is - creating the thunk would be a net loss.
+ if (isec->data.size() <= thunkSize)
+ return;
+
+ // applySafeThunksToRange is called from multiple threads, but
+ // `makeSyntheticInputSection` and `addInputSection` are not thread safe.
+ // So we need to guard them with a mutex.
+ ConcatInputSection *thunk;
+ {
+ std::lock_guard<std::mutex> lock(thunkInsertionMutex);
+ thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
+ addInputSection(thunk);
+ }
+
+ target->initICFSafeThunkBody(thunk, masterIsec);
+ thunk->foldIdentical(isec);
+
+ // Since we're folding the target function into a thunk, we need to adjust
+ // the symbols that now got relocated from the target function to the
+ // thunk.
+ // Since the thunk is only one branch, we move all symbols to offset 0 and
+ // make sure that the size of all non-zero-size symbols is equal to the
+ // size of the branch.
+ for (auto *sym : isec->symbols) {
+ if (sym->value != 0)
+ sym->value = 0;
+ if (sym->size != 0)
+ sym->size = thunkSize;
+ }
+ }
+ }
+}
+
// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
// with matching equivalence class
void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
@@ -335,9 +393,20 @@ void ICF::run() {
forEachClass([&](size_t begin, size_t end) {
if (end - begin < 2)
return;
+ bool useSafeThunks = config->icfLevel == ICFLevel::safe_thunks;
+
+ // For ICF level safe_thunks, replace keepUnique function bodies with
+ // thunks. For all other ICF levles, directly merge the functions.
+ if (useSafeThunks)
+ applySafeThunksToRange(begin, end);
+
ConcatInputSection *beginIsec = icfInputs[begin];
- for (size_t i = begin + 1; i < end; ++i)
+ for (size_t i = begin + 1; i < end; ++i) {
+ // When using safe_thunks, keepUnique inputs are already handeled above
+ if (useSafeThunks && icfInputs[i]->keepUnique)
+ continue;
beginIsec->foldIdentical(icfInputs[i]);
+ }
});
}
@@ -421,11 +490,22 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
// can still fold it.
bool hasFoldableFlags = (isSelRefsSection(isec) ||
sectionType(isec->getFlags()) == MachO::S_REGULAR);
+
+ bool isCodeSec = isCodeSection(isec);
+
+ // When keepUnique is true, the section is not foldable. Unless we are at
+ // icf level safe_thunks, in which case we still want to fold code sections.
+ // When using safe_thunks we'll apply the safe_thunks logic at merge time
+ // based on the 'keepUnique' flag.
+ bool noUniqueRequirement =
+ !isec->keepUnique ||
+ ((config->icfLevel == ICFLevel::safe_thunks) && isCodeSec);
+
// FIXME: consider non-code __text sections as foldable?
bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
- (isCodeSection(isec) || isFoldableWithAddendsRemoved ||
+ (isCodeSec || isFoldableWithAddendsRemoved ||
isGccExceptTabSection(isec)) &&
- !isec->keepUnique && !isec->hasAltEntry &&
+ noUniqueRequirement && !isec->hasAltEntry &&
!isec->shouldOmitFromOutput() && hasFoldableFlags;
if (isFoldable) {
foldable.push_back(isec);
diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index cc47ae4386b477..eaa0336e70cb6b 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -74,6 +74,16 @@ class TargetInfo {
uint64_t selrefVA,
Symbol *objcMsgSend) const = 0;
+ // Init 'thunk' so that it be a direct jump to 'branchTarget'.
+ virtual void initICFSafeThunkBody(InputSection *thunk,
+ InputSection *branchTarget) const {
+ llvm_unreachable("target does not support ICF safe thunks");
+ }
+
+ virtual uint32_t getICFSafeThunkSize() const {
+ llvm_unreachable("target does not support ICF safe thunks");
+ }
+
// Symbols may be referenced via either the GOT or the stubs section,
// depending on the relocation type. prepareSymbolRelocation() will set up the
// GOT/stubs entries, and resolveSymbolVA() will return the addresses of those
diff --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll
new file mode 100644
index 00000000000000..2a0ca8314036f8
--- /dev/null
+++ b/lld/test/MachO/icf-safe-thunks.ll
@@ -0,0 +1,241 @@
+; REQUIRES: aarch64
+
+; RUN: rm -rf %t; mkdir %t
+; RUN: llc -filetype=obj %s -O3 -o %t/icf-obj-safe-thunks.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
+; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/icf-safe.dylib %t/icf-obj-safe-thunks.o
+; RUN: llvm-objdump %t/icf-safe.dylib -d --macho | FileCheck %s --check-prefixes=CHECK-ARM64
+
+; CHECK-ARM64: (__TEXT,__text) section
+; CHECK-ARM64-NEXT: _func_unique_1:
+; CHECK-ARM64-NEXT: mov {{.*}}, #0x1
+;
+; CHECK-ARM64: _func_unique_2_canmerge:
+; CHECK-ARM64-NEXT: mov {{.*}}, #0x2
+;
+; CHECK-ARM64: _func_2identical_v1:
+; CHECK-ARM64-NEXT: mov {{.*}}, #0x2
+;
+; CHECK-ARM64: _func_3identical_v1:
+; CHECK-ARM64-NEXT: mov {{.*}}, #0x3
+;
+; CHECK-ARM64: _func_3identical_v1_canmerge:
+; CHECK-ARM64-NEXT: _func_3identical_v2_canmerge:
+; CHECK-ARM64-NEXT: _func_3identical_v3_canmerge:
+; CHECK-ARM64-NEXT: mov {{.*}}, #0x21
+;
+; CHECK-ARM64: _call_all_funcs:
+; CHECK-ARM64-NEXT: stp x29
+;
+; CHECK-ARM64: _take_func_addr:
+; CHECK-ARM64-NEXT: adr
+;
+; CHECK-ARM64: _func_2identical_v2:
+; CHECK-ARM64-NEXT: bl _func_unique_2_canmerge
+; CHECK-ARM64-NEXT: _func_3identical_v2:
+; CHECK-ARM64-NEXT: bl _func_3identical_v1
+; CHECK-ARM64-NEXT: _func_3identical_v3:
+; CHECK-ARM64-NEXT: bl _func_3identical_v1
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx11.0.0"
+
+ at g_val = global i8 0, align 1
+ at g_ptr = global ptr null, align 8
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_unique_1() #0 {
+entry:
+ store volatile i8 1, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_unique_2_canmerge() local_unnamed_addr #0 {
+entry:
+ store volatile i8 2, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_2identical_v1() #0 {
+entry:
+ store volatile i8 2, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_2identical_v2() #0 {
+entry:
+ store volatile i8 2, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v1() #0 {
+entry:
+ store volatile i8 3, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v2() #0 {
+entry:
+ store volatile i8 3, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v3() #0 {
+entry:
+ store volatile i8 3, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v1_canmerge() local_unnamed_addr #0 {
+entry:
+ store volatile i8 33, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v2_canmerge() local_unnamed_addr #0 {
+entry:
+ store volatile i8 33, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v3_canmerge() local_unnamed_addr #0 {
+entry:
+ store volatile i8 33, ptr @g_val, align 1, !tbaa !5
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp uwtable(sync)
+define void @call_all_funcs() local_unnamed_addr #1 {
+entry:
+ tail call void @func_unique_1()
+ tail call void @func_unique_2_canmerge()
+ tail call void @func_2identical_v1()
+ tail call void @func_2identical_v2()
+ tail call void @func_3identical_v1()
+ tail call void @func_3identical_v2()
+ tail call void @func_3identical_v3()
+ tail call void @func_3identical_v1_canmerge()
+ tail call void @func_3identical_v2_canmerge()
+ tail call void @func_3identical_v3_canmerge()
+ ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @take_func_addr() local_unnamed_addr #0 {
+entry:
+ store volatile ptr @func_unique_1, ptr @g_ptr, align 8, !tbaa !8
+ store volatile ptr @func_2identical_v1, ptr @g_ptr, align 8, !tbaa !8
+ store volatile ptr @func_2identical_v2, ptr @g_ptr, align 8, !tbaa !8
+ store volatile ptr @func_3identical_v1, ptr @g_ptr, align 8, !tbaa !8
+ store volatile ptr @func_3identical_v2, ptr @g_ptr, align 8, !tbaa !8
+ store volatile ptr @func_3identical_v3, ptr @g_ptr, align 8, !tbaa !8
+ ret void
+}
+
+attributes #0 = { mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
+attributes #1 = { mustprogress nofree noinline norecurse nounwind ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"clang"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C++ TBAA"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"any pointer", !6, i64 0}
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;; Generate the above LLVM IR with the below script ;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; #!/bin/bash
+; set -ex
+; TOOLCHAIN_BIN="llvm-project/build/Debug/bin"
+;
+; # Create icf-safe-thunks.cpp file
+; cat > icf-safe-thunks.cpp <<EOF
+;
+; #define ATTR __attribute__((noinline)) extern "C"
+; typedef unsigned long long ULL;
+;
+; volatile char g_val = 0;
+; void *volatile g_ptr = 0;
+;
+; ATTR void func_unique_1() {
+; g_val = 1;
+; }
+;
+; ATTR void func_unique_2_canmerge() {
+; g_val = 2;
+; }
+;
+; ATTR void func_2identical_v1() {
+; g_val = 2;
+; }
+;
+; ATTR void func_2identical_v2() {
+; g_val = 2;
+; }
+;
+; ATTR void func_3identical_v1() {
+; g_val = 3;
+; }
+;
+; ATTR void func_3identical_v2() {
+; g_val = 3;
+; }
+;
+; ATTR void func_3identical_v3() {
+; g_val = 3;
+; }
+;
+; ATTR void func_3identical_v1_canmerge() {
+; g_val = 33;
+; }
+;
+; ATTR void func_3identical_v2_canmerge() {
+; g_val = 33;
+; }
+;
+; ATTR void func_3identical_v3_canmerge() {
+; g_val = 33;
+; }
+;
+; ATTR void call_all_funcs() {
+; func_unique_1();
+; func_unique_2_canmerge();
+; func_2identical_v1();
+; func_2identical_v2();
+; func_3identical_v1();
+; func_3identical_v2();
+; func_3identical_v3();
+; func_3identical_v1_canmerge();
+; func_3identical_v2_canmerge();
+; func_3identical_v3_canmerge();
+; }
+;
+; ATTR void take_func_addr() {
+; g_ptr = (void*)func_unique_1;
+; g_ptr = (void*)func_2identical_v1;
+; g_ptr = (void*)func_2identical_v2;
+; g_ptr = (void*)func_3identical_v1;
+; g_ptr = (void*)func_3identical_v2;
+; g_ptr = (void*)func_3identical_v3;
+; }
+; EOF
+;
+; $TOOLCHAIN_BIN/clang -target arm64-apple-macos11.0 -S -emit-llvm \
+; icf-safe-thunks.cpp -O3 -o icf-safe-thunks.ll
>From 334d0966edf1866567000dfdc92eae2fede7ac66 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 29 Aug 2024 21:30:11 -0700
Subject: [PATCH 2/9] Fix "br" => "b" branch
---
lld/MachO/Arch/ARM64.cpp | 2 +-
lld/test/MachO/icf-safe-thunks.ll | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 30b6c27ff99f89..e6d2bc82099a49 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -181,7 +181,7 @@ void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
}
// Just a single direct branch to the target function.
static constexpr uint32_t icfSafeThunkCode[] = {
- 0x94000000, // 08: b target
+ 0x14000000, // 08: b target
};
void ARM64::initICFSafeThunkBody(InputSection *thunk,
diff --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll
index 2a0ca8314036f8..98a3c0b0d2865d 100644
--- a/lld/test/MachO/icf-safe-thunks.ll
+++ b/lld/test/MachO/icf-safe-thunks.ll
@@ -30,11 +30,11 @@
; CHECK-ARM64-NEXT: adr
;
; CHECK-ARM64: _func_2identical_v2:
-; CHECK-ARM64-NEXT: bl _func_unique_2_canmerge
+; CHECK-ARM64-NEXT: b _func_unique_2_canmerge
; CHECK-ARM64-NEXT: _func_3identical_v2:
-; CHECK-ARM64-NEXT: bl _func_3identical_v1
+; CHECK-ARM64-NEXT: b _func_3identical_v1
; CHECK-ARM64-NEXT: _func_3identical_v3:
-; CHECK-ARM64-NEXT: bl _func_3identical_v1
+; CHECK-ARM64-NEXT: b _func_3identical_v1
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx11.0.0"
>From 8bc9599e95f75231fb9eda656633a68333877431 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 30 Aug 2024 19:18:53 -0700
Subject: [PATCH 3/9] Address Feedback nr.1
---
lld/MachO/Arch/ARM64.cpp | 6 +-
lld/MachO/Driver.cpp | 3 +-
lld/MachO/ICF.cpp | 100 ++++++++++++++++--------------
lld/test/MachO/icf-safe-thunks.ll | 6 +-
4 files changed, 61 insertions(+), 54 deletions(-)
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index e6d2bc82099a49..195a8f09f47c1a 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -42,9 +42,9 @@ struct ARM64 : ARM64Common {
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
- virtual void initICFSafeThunkBody(InputSection *thunk,
- InputSection *branchTarget) const override;
- virtual uint32_t getICFSafeThunkSize() const override;
+ void initICFSafeThunkBody(InputSection *thunk,
+ InputSection *branchTarget) const override;
+ uint32_t getICFSafeThunkSize() const override;
};
} // namespace
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 7b23fdfea1303d..babbce2a81cd7d 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -851,8 +851,7 @@ static ICFLevel getICFLevel(const ArgList &args) {
.Case("all", ICFLevel::all)
.Default(ICFLevel::unknown);
- if (icfLevel == ICFLevel::safe_thunks &&
- !is_contained({AK_x86_64h, AK_arm64}, config->arch())) {
+ if ((icfLevel == ICFLevel::safe_thunks) && (config->arch() != AK_arm64)) {
error("--icf=safe_thunks is only supported on arm64 targets");
}
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index cac5e673986829..18efdfcfffb153 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -252,59 +252,69 @@ void ICF::forEachClassRange(size_t begin, size_t end,
}
}
-// Given a range of identical icfInputs's, replace address significant functions
+// Given a range of identical icfInputs, replace address significant functions
// with a thunk that is just a direct branch to the first function in the
-// series. This way we end up we keep only one main body of the function but we
-// still retain address uniqueness of rellevant functions by having them be a
-// direct branch thunk rather than contain a full copy of the actual function
+// series. This way we keep only one main body of the function but we still
+// retain the address uniqueness of relevant functions by having them be a
+// direct branch thunk rather than containing a full copy of the actual function
// body.
void ICF::applySafeThunksToRange(size_t begin, size_t end) {
- // If we need to create a unique ICF thunk, use the first section as the
- // section that all thunks will branch to.
- ConcatInputSection *masterIsec = icfInputs[begin];
+ // If the functions we're dealing with are smaller than the thunk size, then
+ // just leave them all as-is - creating thunks would be a net loss.
uint32_t thunkSize = target->getICFSafeThunkSize();
- static std::mutex thunkInsertionMutex;
+ if (icfInputs[begin]->data.size() <= thunkSize)
+ return;
+
+ // The standard ICF algorithm will merge all functions in the [begin + 1, end)
+ // range into icfInputs[begin].So, the body of the first function is always
+ // kept, even if it is not keepUnique. To make safe_thunks nicely play with
+ // this behavior, we ensure that the first function in the range is keepUnique.
+ if (!icfInputs[begin]->keepUnique) {
+ bool haveKeepUnique = false;
+ for (size_t i = begin + 1; i < end; ++i) {
+ if (icfInputs[i]->keepUnique) {
+ std::swap(icfInputs[begin], icfInputs[i]);
+ haveKeepUnique = true;
+ break;
+ }
+ }
+ // If we don't have keepUnique funcs, we can just return
+ if (!haveKeepUnique)
+ return;
+ }
- uint32_t keepUniqueCount = masterIsec->keepUnique ? 1 : 0;
+ // When creating a unique ICF thunk, use the first section as the section that
+ // all thunks will branch to.
+ ConcatInputSection *masterIsec = icfInputs[begin];
+
+ static std::mutex thunkInsertionMutex;
for (size_t i = begin + 1; i < end; ++i) {
ConcatInputSection *isec = icfInputs[i];
- if (isec->keepUnique)
- ++keepUniqueCount;
-
- // We create thunks for the 2nd, 3rd, ... keepUnique sections. The first
- // keepUnique section we leave as is - as it will not end up sharing an
- // address with any other keepUnique section.
- if (keepUniqueCount >= 2 && isec->keepUnique) {
- // If the target to be folded is smaller than the thunk size, then just
- // leave it as-is - creating the thunk would be a net loss.
- if (isec->data.size() <= thunkSize)
- return;
-
- // applySafeThunksToRange is called from multiple threads, but
- // `makeSyntheticInputSection` and `addInputSection` are not thread safe.
- // So we need to guard them with a mutex.
- ConcatInputSection *thunk;
- {
- std::lock_guard<std::mutex> lock(thunkInsertionMutex);
- thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
- addInputSection(thunk);
- }
+ if (!isec->keepUnique)
+ continue;
- target->initICFSafeThunkBody(thunk, masterIsec);
- thunk->foldIdentical(isec);
-
- // Since we're folding the target function into a thunk, we need to adjust
- // the symbols that now got relocated from the target function to the
- // thunk.
- // Since the thunk is only one branch, we move all symbols to offset 0 and
- // make sure that the size of all non-zero-size symbols is equal to the
- // size of the branch.
- for (auto *sym : isec->symbols) {
- if (sym->value != 0)
- sym->value = 0;
- if (sym->size != 0)
- sym->size = thunkSize;
- }
+ // applySafeThunksToRange is called from multiple threads, but
+ // `makeSyntheticInputSection` and `addInputSection` are not thread safe. So
+ // we need to guard them with a mutex.
+ ConcatInputSection *thunk;
+ {
+ std::lock_guard<std::mutex> lock(thunkInsertionMutex);
+ thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
+ addInputSection(thunk);
+ }
+
+ target->initICFSafeThunkBody(thunk, masterIsec);
+ thunk->foldIdentical(isec);
+
+ // Since we're folding the target function into a thunk, we need to adjust
+ // the symbols that now got relocated from the target function to the thunk.
+ // Since the thunk is only one branch, we move all symbols to offset 0 and
+ // make sure that the size of all non-zero-size symbols is equal to the size
+ // of the branch.
+ for (auto *sym : isec->symbols) {
+ sym->value = 0;
+ if (sym->size != 0)
+ sym->size = thunkSize;
}
}
}
diff --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll
index 98a3c0b0d2865d..23dd271d6d75a3 100644
--- a/lld/test/MachO/icf-safe-thunks.ll
+++ b/lld/test/MachO/icf-safe-thunks.ll
@@ -10,9 +10,7 @@
; CHECK-ARM64-NEXT: mov {{.*}}, #0x1
;
; CHECK-ARM64: _func_unique_2_canmerge:
-; CHECK-ARM64-NEXT: mov {{.*}}, #0x2
-;
-; CHECK-ARM64: _func_2identical_v1:
+; CHECK-ARM64-NEXT: _func_2identical_v1:
; CHECK-ARM64-NEXT: mov {{.*}}, #0x2
;
; CHECK-ARM64: _func_3identical_v1:
@@ -30,7 +28,7 @@
; CHECK-ARM64-NEXT: adr
;
; CHECK-ARM64: _func_2identical_v2:
-; CHECK-ARM64-NEXT: b _func_unique_2_canmerge
+; CHECK-ARM64-NEXT: b _func_2identical_v1
; CHECK-ARM64-NEXT: _func_3identical_v2:
; CHECK-ARM64-NEXT: b _func_3identical_v1
; CHECK-ARM64-NEXT: _func_3identical_v3:
>From 4c69771b0144965f799dd3fb299512e9faddc237 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 30 Aug 2024 19:24:46 -0700
Subject: [PATCH 4/9] Fix Formatting
---
lld/MachO/ICF.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 18efdfcfffb153..f454885a51a4aa 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -268,7 +268,8 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
// The standard ICF algorithm will merge all functions in the [begin + 1, end)
// range into icfInputs[begin].So, the body of the first function is always
// kept, even if it is not keepUnique. To make safe_thunks nicely play with
- // this behavior, we ensure that the first function in the range is keepUnique.
+ // this behavior, we ensure that the first function in the range is
+ // keepUnique.
if (!icfInputs[begin]->keepUnique) {
bool haveKeepUnique = false;
for (size_t i = begin + 1; i < end; ++i) {
>From d48c15758f43268782d3a446a07fcd77fcd9adfb Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 08:20:30 -0700
Subject: [PATCH 5/9] Fix map file, remove thread locking
---
lld/MachO/ICF.cpp | 37 ++++++++++++++++++-------------
lld/MachO/InputSection.cpp | 5 +++--
lld/MachO/InputSection.h | 4 +++-
lld/MachO/MapFile.cpp | 2 +-
lld/MachO/Symbols.cpp | 2 +-
lld/MachO/Symbols.h | 9 ++++++--
lld/MachO/SyntheticSections.cpp | 3 ++-
lld/test/MachO/icf-safe-thunks.ll | 17 +++++++++++++-
8 files changed, 55 insertions(+), 24 deletions(-)
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index f454885a51a4aa..bea2318a69c87e 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -266,7 +266,7 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
return;
// The standard ICF algorithm will merge all functions in the [begin + 1, end)
- // range into icfInputs[begin].So, the body of the first function is always
+ // range into icfInputs[begin], so the body of the first function is always
// kept, even if it is not keepUnique. To make safe_thunks nicely play with
// this behavior, we ensure that the first function in the range is
// keepUnique.
@@ -288,31 +288,26 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
// all thunks will branch to.
ConcatInputSection *masterIsec = icfInputs[begin];
- static std::mutex thunkInsertionMutex;
for (size_t i = begin + 1; i < end; ++i) {
ConcatInputSection *isec = icfInputs[i];
+ // When we're done processing keepUnique entries, we can stop. Sorting
+ // guaratees that all keepUnique will be at the front.
if (!isec->keepUnique)
- continue;
+ break;
- // applySafeThunksToRange is called from multiple threads, but
- // `makeSyntheticInputSection` and `addInputSection` are not thread safe. So
- // we need to guard them with a mutex.
ConcatInputSection *thunk;
- {
- std::lock_guard<std::mutex> lock(thunkInsertionMutex);
- thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
- addInputSection(thunk);
- }
+ thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
+ addInputSection(thunk);
target->initICFSafeThunkBody(thunk, masterIsec);
- thunk->foldIdentical(isec);
+ thunk->foldIdentical(isec, Symbol::ICFFoldKind::Folded_Thunk);
// Since we're folding the target function into a thunk, we need to adjust
// the symbols that now got relocated from the target function to the thunk.
// Since the thunk is only one branch, we move all symbols to offset 0 and
// make sure that the size of all non-zero-size symbols is equal to the size
// of the branch.
- for (auto *sym : isec->symbols) {
+ for (auto *sym : thunk->symbols) {
sym->value = 0;
if (sym->size != 0)
sym->size = thunkSize;
@@ -381,6 +376,12 @@ void ICF::run() {
llvm::stable_sort(
icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
+ // When using safe_thunks, ensure that we first sort by icfEqClass and
+ // then by keepUnique (descending). This guarantees that within an
+ // equivalence class, the keepUnique inputs are always first.
+ if (config->icfLevel == ICFLevel::safe_thunks)
+ if (a->icfEqClass[0] == b->icfEqClass[0])
+ return a->keepUnique > b->keepUnique;
return a->icfEqClass[0] < b->icfEqClass[0];
});
forEachClass([&](size_t begin, size_t end) {
@@ -400,6 +401,14 @@ void ICF::run() {
log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
}
+ // When using safe_thunks, we need to create thunks for all keepUnique
+ // functions that can be deduplicated. Since we're creating / adding new
+ // InputSections, we can't paralellize this.
+ if (config->icfLevel == ICFLevel::safe_thunks)
+ forEachClassRange(0, icfInputs.size(), [&](size_t begin, size_t end) {
+ applySafeThunksToRange(begin, end);
+ });
+
// Fold sections within equivalence classes
forEachClass([&](size_t begin, size_t end) {
if (end - begin < 2)
@@ -408,8 +417,6 @@ void ICF::run() {
// For ICF level safe_thunks, replace keepUnique function bodies with
// thunks. For all other ICF levles, directly merge the functions.
- if (useSafeThunks)
- applySafeThunksToRange(begin, end);
ConcatInputSection *beginIsec = icfInputs[begin];
for (size_t i = begin + 1; i < end; ++i) {
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index a9b93e07a60133..64c584920defba 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -190,13 +190,14 @@ const Reloc *InputSection::getRelocAt(uint32_t off) const {
return &*it;
}
-void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
+void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
+ Symbol::ICFFoldKind foldKind) {
align = std::max(align, copy->align);
copy->live = false;
copy->wasCoalesced = true;
copy->replacement = this;
for (auto ©Sym : copy->symbols)
- copySym->wasIdenticalCodeFolded = true;
+ copySym->identicalCodeFoldingKind = foldKind;
symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
copy->symbols.clear();
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 0f389e50425a32..f289864a75ca08 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -117,7 +117,9 @@ class ConcatInputSection final : public InputSection {
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
void writeTo(uint8_t *buf);
- void foldIdentical(ConcatInputSection *redundant);
+ void foldIdentical(
+ ConcatInputSection *redundant,
+ Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Folded_Body);
ConcatInputSection *canonical() override {
return replacement ? replacement : this;
}
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 5bcaeca48da2a2..20dbd1ed9d033e 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -156,7 +156,7 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
}
static uint64_t getSymSizeForMap(Defined *sym) {
- if (sym->wasIdenticalCodeFolded)
+ if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Folded_Body)
return 0;
return sym->size;
}
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 7bac78a59e4fec..910d2393eaf98d 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -60,7 +60,7 @@ Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec,
bool interposable)
: Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef),
privateExtern(isPrivateExtern), includeInSymtab(includeInSymtab),
- wasIdenticalCodeFolded(false),
+ identicalCodeFoldingKind(ICFFoldKind::None),
referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden),
weakDef(isWeakDef), external(isExternal), originalIsec(isec),
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index be5d135b8cd54f..70ef59557a4038 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -41,6 +41,11 @@ class Symbol {
AliasKind,
};
+ // Enum that describes the kind of ICF folding that a symbol is involved in.
+ // We need to keep track of this to correctly display symbol sizes in the map
+ // file.
+ enum ICFFoldKind { None, Folded_Body, Folded_Thunk };
+
virtual ~Symbol() {}
Kind kind() const { return symbolKind; }
@@ -154,8 +159,8 @@ class Defined : public Symbol {
bool privateExtern : 1;
// Whether this symbol should appear in the output symbol table.
bool includeInSymtab : 1;
- // Whether this symbol was folded into a different symbol during ICF.
- bool wasIdenticalCodeFolded : 1;
+ // The ICF folding kind of this symbol: None / Folded / Folded_Thunk.
+ ICFFoldKind identicalCodeFoldingKind : 2;
// Symbols marked referencedDynamically won't be removed from the output's
// symbol table by tools like strip. In theory, this could be set on arbitrary
// symbols in input object files. In practice, it's used solely for the
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 939e9b286d77f5..cfae3f82bb25a8 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1231,7 +1231,8 @@ void SymtabSection::emitStabs() {
// Constant-folded symbols go in the executable's symbol table, but don't
// get a stabs entry unless --keep-icf-stabs flag is specified
- if (!config->keepICFStabs && defined->wasIdenticalCodeFolded)
+ if (!config->keepICFStabs &&
+ defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Folded_Body)
continue;
ObjFile *file = defined->getObjectFile();
diff --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll
index 23dd271d6d75a3..238e90f952e160 100644
--- a/lld/test/MachO/icf-safe-thunks.ll
+++ b/lld/test/MachO/icf-safe-thunks.ll
@@ -2,8 +2,9 @@
; RUN: rm -rf %t; mkdir %t
; RUN: llc -filetype=obj %s -O3 -o %t/icf-obj-safe-thunks.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
-; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/icf-safe.dylib %t/icf-obj-safe-thunks.o
+; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/icf-safe.dylib -map %t/icf-safe.map %t/icf-obj-safe-thunks.o
; RUN: llvm-objdump %t/icf-safe.dylib -d --macho | FileCheck %s --check-prefixes=CHECK-ARM64
+; RUN: cat %t/icf-safe.map | FileCheck %s --check-prefixes=CHECK-ARM64-MAP
; CHECK-ARM64: (__TEXT,__text) section
; CHECK-ARM64-NEXT: _func_unique_1:
@@ -34,6 +35,20 @@
; CHECK-ARM64-NEXT: _func_3identical_v3:
; CHECK-ARM64-NEXT: b _func_3identical_v1
+
+; CHECK-ARM64-MAP: 0x00000010 [ 2] _func_unique_1
+; CHECK-ARM64-MAP-NEXT: 0x00000010 [ 2] _func_2identical_v1
+; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_unique_2_canmerge
+; CHECK-ARM64-MAP-NEXT: 0x00000010 [ 2] _func_3identical_v1
+; CHECK-ARM64-MAP-NEXT: 0x00000010 [ 2] _func_3identical_v1_canmerge
+; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v2_canmerge
+; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v3_canmerge
+; CHECK-ARM64-MAP-NEXT: 0x00000034 [ 2] _call_all_funcs
+; CHECK-ARM64-MAP-NEXT: 0x00000050 [ 2] _take_func_addr
+; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_2identical_v2
+; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_3identical_v2
+; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_3identical_v3
+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx11.0.0"
>From fe7d0262a1dd500195a13c75a0a7cf038c8a6f31 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 08:51:49 -0700
Subject: [PATCH 6/9] Add assert
---
lld/MachO/ICF.cpp | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index bea2318a69c87e..637affd82f080f 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -421,8 +421,14 @@ void ICF::run() {
ConcatInputSection *beginIsec = icfInputs[begin];
for (size_t i = begin + 1; i < end; ++i) {
// When using safe_thunks, keepUnique inputs are already handeled above
- if (useSafeThunks && icfInputs[i]->keepUnique)
+ if (useSafeThunks && icfInputs[i]->keepUnique) {
+ // Assert that all keepUnique input sections have been replaced with
+ // thunks
+ assert(!icfInputs[i]->live);
+ assert(icfInputs[i]->replacement->data.size() ==
+ target->getICFSafeThunkSize());
continue;
+ }
beginIsec->foldIdentical(icfInputs[i]);
}
});
>From 9f1a42f357c3cfab10654309bf47610b8dfeaa07 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 10:55:39 -0700
Subject: [PATCH 7/9] Fix assert
---
lld/MachO/ICF.cpp | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 637affd82f080f..f6afd48d294159 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -420,13 +420,14 @@ void ICF::run() {
ConcatInputSection *beginIsec = icfInputs[begin];
for (size_t i = begin + 1; i < end; ++i) {
- // When using safe_thunks, keepUnique inputs are already handeled above
+ // Skip keepUnique inputs when using safe_thunks (already handeled above)
if (useSafeThunks && icfInputs[i]->keepUnique) {
- // Assert that all keepUnique input sections have been replaced with
- // thunks
- assert(!icfInputs[i]->live);
- assert(icfInputs[i]->replacement->data.size() ==
- target->getICFSafeThunkSize());
+ // Assert keepUnique sections are either small or replaced with thunks.
+ assert(!icfInputs[i]->live ||
+ icfInputs[i]->data.size() <= target->getICFSafeThunkSize());
+ assert(!icfInputs[i]->replacement ||
+ icfInputs[i]->replacement->data.size() ==
+ target->getICFSafeThunkSize());
continue;
}
beginIsec->foldIdentical(icfInputs[i]);
>From 5ed74eee454f3ea24d78488e3c121a1322a707b6 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 12:49:07 -0700
Subject: [PATCH 8/9] Address feedback
---
lld/MachO/ICF.cpp | 25 +++----------------------
lld/MachO/InputSection.h | 2 +-
lld/MachO/MapFile.cpp | 2 +-
lld/MachO/Symbols.h | 14 +++++++++-----
lld/MachO/SyntheticSections.cpp | 2 +-
5 files changed, 15 insertions(+), 30 deletions(-)
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index f6afd48d294159..2ff962b06e3679 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -265,25 +265,6 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
if (icfInputs[begin]->data.size() <= thunkSize)
return;
- // The standard ICF algorithm will merge all functions in the [begin + 1, end)
- // range into icfInputs[begin], so the body of the first function is always
- // kept, even if it is not keepUnique. To make safe_thunks nicely play with
- // this behavior, we ensure that the first function in the range is
- // keepUnique.
- if (!icfInputs[begin]->keepUnique) {
- bool haveKeepUnique = false;
- for (size_t i = begin + 1; i < end; ++i) {
- if (icfInputs[i]->keepUnique) {
- std::swap(icfInputs[begin], icfInputs[i]);
- haveKeepUnique = true;
- break;
- }
- }
- // If we don't have keepUnique funcs, we can just return
- if (!haveKeepUnique)
- return;
- }
-
// When creating a unique ICF thunk, use the first section as the section that
// all thunks will branch to.
ConcatInputSection *masterIsec = icfInputs[begin];
@@ -295,12 +276,12 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
if (!isec->keepUnique)
break;
- ConcatInputSection *thunk;
- thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
+ ConcatInputSection *thunk =
+ makeSyntheticInputSection(isec->getSegName(), isec->getName());
addInputSection(thunk);
target->initICFSafeThunkBody(thunk, masterIsec);
- thunk->foldIdentical(isec, Symbol::ICFFoldKind::Folded_Thunk);
+ thunk->foldIdentical(isec, Symbol::ICFFoldKind::Thunk);
// Since we're folding the target function into a thunk, we need to adjust
// the symbols that now got relocated from the target function to the thunk.
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index f289864a75ca08..0785d5d90f1084 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -119,7 +119,7 @@ class ConcatInputSection final : public InputSection {
void foldIdentical(
ConcatInputSection *redundant,
- Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Folded_Body);
+ Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
ConcatInputSection *canonical() override {
return replacement ? replacement : this;
}
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 20dbd1ed9d033e..9c0621622ae2f0 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -156,7 +156,7 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
}
static uint64_t getSymSizeForMap(Defined *sym) {
- if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Folded_Body)
+ if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
return 0;
return sym->size;
}
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 70ef59557a4038..e355e31bb36d6b 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -41,10 +41,14 @@ class Symbol {
AliasKind,
};
- // Enum that describes the kind of ICF folding that a symbol is involved in.
- // We need to keep track of this to correctly display symbol sizes in the map
- // file.
- enum ICFFoldKind { None, Folded_Body, Folded_Thunk };
+ // Enum that describes the type of Identical Code Folding (ICF) applied to a
+ // symbol. This information is crucial for accurately representing symbol
+ // sizes in the map file.
+ enum ICFFoldKind {
+ None, // No folding is applied.
+ Body, // The entire body (function or data) is folded.
+ Thunk // The function body is folded into a single branch thunk.
+ };
virtual ~Symbol() {}
@@ -159,7 +163,7 @@ class Defined : public Symbol {
bool privateExtern : 1;
// Whether this symbol should appear in the output symbol table.
bool includeInSymtab : 1;
- // The ICF folding kind of this symbol: None / Folded / Folded_Thunk.
+ // The ICF folding kind of this symbol: None / Body / Thunk.
ICFFoldKind identicalCodeFoldingKind : 2;
// Symbols marked referencedDynamically won't be removed from the output's
// symbol table by tools like strip. In theory, this could be set on arbitrary
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index cfae3f82bb25a8..7b0078856c5e22 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1232,7 +1232,7 @@ void SymtabSection::emitStabs() {
// Constant-folded symbols go in the executable's symbol table, but don't
// get a stabs entry unless --keep-icf-stabs flag is specified
if (!config->keepICFStabs &&
- defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Folded_Body)
+ defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
continue;
ObjFile *file = defined->getObjectFile();
>From 88d7983964385718d69213af22bc305cf14d33c6 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 13:12:48 -0700
Subject: [PATCH 9/9] Fix formatting
---
lld/MachO/InputSection.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 0785d5d90f1084..4e238d8ef77793 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -117,9 +117,8 @@ class ConcatInputSection final : public InputSection {
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
void writeTo(uint8_t *buf);
- void foldIdentical(
- ConcatInputSection *redundant,
- Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
+ void foldIdentical(ConcatInputSection *redundant,
+ Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
ConcatInputSection *canonical() override {
return replacement ? replacement : this;
}
More information about the llvm-commits
mailing list