[lld] [lld-macho][arm64] Enhance safe ICF with thunk-based deduplication (PR #106573)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 5 13:13:06 PDT 2024


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/106573

>From e2f8e5c4ed8be9a453f92227e04f30fbc9d1d3d3 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 29 Aug 2024 07:23:19 -0700
Subject: [PATCH 1/9] [lld-macho] Enhance safe ICF with thunk-based
 deduplication

Currently, our `safe` ICF mode only merges non-address-significant code, leaving duplicate address-significant functions in the output. This patch introduces `safe_thunks` ICF mode, which keeps a single master copy of each function and replaces address-significant duplicates with thunks that branch to the master copy.
---
 lld/MachO/Arch/ARM64.cpp          |  23 +++
 lld/MachO/Config.h                |   1 +
 lld/MachO/Driver.cpp              |  10 +-
 lld/MachO/ICF.cpp                 |  86 ++++++++++-
 lld/MachO/Target.h                |  10 ++
 lld/test/MachO/icf-safe-thunks.ll | 241 ++++++++++++++++++++++++++++++
 6 files changed, 367 insertions(+), 4 deletions(-)
 create mode 100644 lld/test/MachO/icf-safe-thunks.ll

diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index e192676394c965..30b6c27ff99f89 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -41,6 +41,10 @@ struct ARM64 : ARM64Common {
                             Symbol *objcMsgSend) const override;
   void populateThunk(InputSection *thunk, Symbol *funcSym) override;
   void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
+
+  virtual void initICFSafeThunkBody(InputSection *thunk,
+                                    InputSection *branchTarget) const override;
+  virtual uint32_t getICFSafeThunkSize() const override;
 };
 
 } // namespace
@@ -175,6 +179,25 @@ void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
                              /*offset=*/0, /*addend=*/0,
                              /*referent=*/funcSym);
 }
+// Just a single direct branch to the target function.
+static constexpr uint32_t icfSafeThunkCode[] = {
+    0x94000000, // 08: b    target
+};
+
+void ARM64::initICFSafeThunkBody(InputSection *thunk,
+                                 InputSection *branchTarget) const {
+  // The base data here will not be itself modified, we'll just be adding a
+  // reloc below. So we can directly use the constexpr above as the data.
+  thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
+                 sizeof(icfSafeThunkCode)};
+
+  thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
+                             /*pcrel=*/true, /*length=*/2,
+                             /*offset=*/0, /*addend=*/0,
+                             /*referent=*/branchTarget);
+}
+
+uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
 
 ARM64::ARM64() : ARM64Common(LP64()) {
   cpuType = CPU_TYPE_ARM64;
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 5beb0662ba7274..4e940693602c95 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -68,6 +68,7 @@ enum class ICFLevel {
   unknown,
   none,
   safe,
+  safe_thunks,
   all,
 };
 
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 6a1ff96ed65697..7b23fdfea1303d 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -847,8 +847,15 @@ static ICFLevel getICFLevel(const ArgList &args) {
   auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
                       .Cases("none", "", ICFLevel::none)
                       .Case("safe", ICFLevel::safe)
+                      .Case("safe_thunks", ICFLevel::safe_thunks)
                       .Case("all", ICFLevel::all)
                       .Default(ICFLevel::unknown);
+
+  if (icfLevel == ICFLevel::safe_thunks &&
+      !is_contained({AK_x86_64h, AK_arm64}, config->arch())) {
+    error("--icf=safe_thunks is only supported on arm64 targets");
+  }
+
   if (icfLevel == ICFLevel::unknown) {
     warn(Twine("unknown --icf=OPTION `") + icfLevelStr +
          "', defaulting to `none'");
@@ -2104,7 +2111,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
     // foldIdenticalLiterals before foldIdenticalSections.
     foldIdenticalLiterals();
     if (config->icfLevel != ICFLevel::none) {
-      if (config->icfLevel == ICFLevel::safe)
+      if (config->icfLevel == ICFLevel::safe ||
+          config->icfLevel == ICFLevel::safe_thunks)
         markAddrSigSymbols();
       foldIdenticalSections(/*onlyCfStrings=*/false);
     } else if (config->dedupStrings) {
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index fc786b571dc64f..cac5e673986829 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -45,6 +45,7 @@ class ICF {
                       const ConcatInputSection *ib);
   bool equalsVariable(const ConcatInputSection *ia,
                       const ConcatInputSection *ib);
+  void applySafeThunksToRange(size_t begin, size_t end);
 
   // ICF needs a copy of the inputs vector because its equivalence-class
   // segregation algorithm destroys the proper sequence.
@@ -251,6 +252,63 @@ void ICF::forEachClassRange(size_t begin, size_t end,
   }
 }
 
+// Given a range of identical icfInputs's, replace address significant functions
+// with a thunk that is just a direct branch to the first function in the
+// series. This way we end up we keep only one main body of the function but we
+// still retain address uniqueness of rellevant functions by having them be a
+// direct branch thunk rather than contain a full copy of the actual function
+// body.
+void ICF::applySafeThunksToRange(size_t begin, size_t end) {
+  // If we need to create a unique ICF thunk, use the first section as the
+  // section that all thunks will branch to.
+  ConcatInputSection *masterIsec = icfInputs[begin];
+  uint32_t thunkSize = target->getICFSafeThunkSize();
+  static std::mutex thunkInsertionMutex;
+
+  uint32_t keepUniqueCount = masterIsec->keepUnique ? 1 : 0;
+  for (size_t i = begin + 1; i < end; ++i) {
+    ConcatInputSection *isec = icfInputs[i];
+    if (isec->keepUnique)
+      ++keepUniqueCount;
+
+    // We create thunks for the 2nd, 3rd, ... keepUnique sections. The first
+    // keepUnique section we leave as is - as it will not end up sharing an
+    // address with any other keepUnique section.
+    if (keepUniqueCount >= 2 && isec->keepUnique) {
+      // If the target to be folded is smaller than the thunk size, then just
+      // leave it as-is - creating the thunk would be a net loss.
+      if (isec->data.size() <= thunkSize)
+        return;
+
+      // applySafeThunksToRange is called from multiple threads, but
+      // `makeSyntheticInputSection` and `addInputSection` are not thread safe.
+      // So we need to guard them with a mutex.
+      ConcatInputSection *thunk;
+      {
+        std::lock_guard<std::mutex> lock(thunkInsertionMutex);
+        thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
+        addInputSection(thunk);
+      }
+
+      target->initICFSafeThunkBody(thunk, masterIsec);
+      thunk->foldIdentical(isec);
+
+      // Since we're folding the target function into a thunk, we need to adjust
+      // the symbols that now got relocated from the target function to the
+      // thunk.
+      // Since the thunk is only one branch, we move all symbols to offset 0 and
+      // make sure that the size of all non-zero-size symbols is equal to the
+      // size of the branch.
+      for (auto *sym : isec->symbols) {
+        if (sym->value != 0)
+          sym->value = 0;
+        if (sym->size != 0)
+          sym->size = thunkSize;
+      }
+    }
+  }
+}
+
 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
 // with matching equivalence class
 void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
@@ -335,9 +393,20 @@ void ICF::run() {
   forEachClass([&](size_t begin, size_t end) {
     if (end - begin < 2)
       return;
+    bool useSafeThunks = config->icfLevel == ICFLevel::safe_thunks;
+
+    // For ICF level safe_thunks, replace keepUnique function bodies with
+    // thunks. For all other ICF levles, directly merge the functions.
+    if (useSafeThunks)
+      applySafeThunksToRange(begin, end);
+
     ConcatInputSection *beginIsec = icfInputs[begin];
-    for (size_t i = begin + 1; i < end; ++i)
+    for (size_t i = begin + 1; i < end; ++i) {
+      // When using safe_thunks, keepUnique inputs are already handeled above
+      if (useSafeThunks && icfInputs[i]->keepUnique)
+        continue;
       beginIsec->foldIdentical(icfInputs[i]);
+    }
   });
 }
 
@@ -421,11 +490,22 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
     // can still fold it.
     bool hasFoldableFlags = (isSelRefsSection(isec) ||
                              sectionType(isec->getFlags()) == MachO::S_REGULAR);
+
+    bool isCodeSec = isCodeSection(isec);
+
+    // When keepUnique is true, the section is not foldable. Unless we are at
+    // icf level safe_thunks, in which case we still want to fold code sections.
+    // When using safe_thunks we'll apply the safe_thunks logic at merge time
+    // based on the 'keepUnique' flag.
+    bool noUniqueRequirement =
+        !isec->keepUnique ||
+        ((config->icfLevel == ICFLevel::safe_thunks) && isCodeSec);
+
     // FIXME: consider non-code __text sections as foldable?
     bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
-                      (isCodeSection(isec) || isFoldableWithAddendsRemoved ||
+                      (isCodeSec || isFoldableWithAddendsRemoved ||
                        isGccExceptTabSection(isec)) &&
-                      !isec->keepUnique && !isec->hasAltEntry &&
+                      noUniqueRequirement && !isec->hasAltEntry &&
                       !isec->shouldOmitFromOutput() && hasFoldableFlags;
     if (isFoldable) {
       foldable.push_back(isec);
diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index cc47ae4386b477..eaa0336e70cb6b 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -74,6 +74,16 @@ class TargetInfo {
                                     uint64_t selrefVA,
                                     Symbol *objcMsgSend) const = 0;
 
+  // Init 'thunk' so that it be a direct jump to 'branchTarget'.
+  virtual void initICFSafeThunkBody(InputSection *thunk,
+                                    InputSection *branchTarget) const {
+    llvm_unreachable("target does not support ICF safe thunks");
+  }
+
+  virtual uint32_t getICFSafeThunkSize() const {
+    llvm_unreachable("target does not support ICF safe thunks");
+  }
+
   // Symbols may be referenced via either the GOT or the stubs section,
   // depending on the relocation type. prepareSymbolRelocation() will set up the
   // GOT/stubs entries, and resolveSymbolVA() will return the addresses of those
diff --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll
new file mode 100644
index 00000000000000..2a0ca8314036f8
--- /dev/null
+++ b/lld/test/MachO/icf-safe-thunks.ll
@@ -0,0 +1,241 @@
+; REQUIRES: aarch64
+
+; RUN: rm -rf %t; mkdir %t
+; RUN: llc -filetype=obj %s -O3 -o %t/icf-obj-safe-thunks.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
+; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/icf-safe.dylib %t/icf-obj-safe-thunks.o
+; RUN: llvm-objdump %t/icf-safe.dylib -d --macho | FileCheck %s --check-prefixes=CHECK-ARM64
+
+; CHECK-ARM64:        (__TEXT,__text) section
+; CHECK-ARM64-NEXT:   _func_unique_1:
+; CHECK-ARM64-NEXT:        mov {{.*}}, #0x1
+;
+; CHECK-ARM64:        _func_unique_2_canmerge:
+; CHECK-ARM64-NEXT:        mov {{.*}}, #0x2
+;
+; CHECK-ARM64:        _func_2identical_v1:
+; CHECK-ARM64-NEXT:        mov {{.*}}, #0x2
+;
+; CHECK-ARM64:        _func_3identical_v1:
+; CHECK-ARM64-NEXT:        mov {{.*}}, #0x3
+;
+; CHECK-ARM64:        _func_3identical_v1_canmerge:
+; CHECK-ARM64-NEXT:   _func_3identical_v2_canmerge:
+; CHECK-ARM64-NEXT:   _func_3identical_v3_canmerge:
+; CHECK-ARM64-NEXT:        mov {{.*}}, #0x21
+;
+; CHECK-ARM64:        _call_all_funcs:
+; CHECK-ARM64-NEXT:        stp  x29
+;
+; CHECK-ARM64:        _take_func_addr:
+; CHECK-ARM64-NEXT:        adr
+;
+; CHECK-ARM64:        _func_2identical_v2:
+; CHECK-ARM64-NEXT:         bl  _func_unique_2_canmerge
+; CHECK-ARM64-NEXT:   _func_3identical_v2:
+; CHECK-ARM64-NEXT:        bl  _func_3identical_v1
+; CHECK-ARM64-NEXT:   _func_3identical_v3:
+; CHECK-ARM64-NEXT:        bl  _func_3identical_v1
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx11.0.0"
+
+ at g_val = global i8 0, align 1
+ at g_ptr = global ptr null, align 8
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_unique_1() #0 {
+entry:
+  store volatile i8 1, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_unique_2_canmerge() local_unnamed_addr #0 {
+entry:
+  store volatile i8 2, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_2identical_v1() #0 {
+entry:
+  store volatile i8 2, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_2identical_v2() #0 {
+entry:
+  store volatile i8 2, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v1() #0 {
+entry:
+  store volatile i8 3, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v2() #0 {
+entry:
+  store volatile i8 3, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v3() #0 {
+entry:
+  store volatile i8 3, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v1_canmerge() local_unnamed_addr #0 {
+entry:
+  store volatile i8 33, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v2_canmerge() local_unnamed_addr #0 {
+entry:
+  store volatile i8 33, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_3identical_v3_canmerge() local_unnamed_addr #0 {
+entry:
+  store volatile i8 33, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp uwtable(sync)
+define void @call_all_funcs() local_unnamed_addr #1 {
+entry:
+  tail call void @func_unique_1()
+  tail call void @func_unique_2_canmerge()
+  tail call void @func_2identical_v1()
+  tail call void @func_2identical_v2()
+  tail call void @func_3identical_v1()
+  tail call void @func_3identical_v2()
+  tail call void @func_3identical_v3()
+  tail call void @func_3identical_v1_canmerge()
+  tail call void @func_3identical_v2_canmerge()
+  tail call void @func_3identical_v3_canmerge()
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @take_func_addr() local_unnamed_addr #0 {
+entry:
+  store volatile ptr @func_unique_1, ptr @g_ptr, align 8, !tbaa !8
+  store volatile ptr @func_2identical_v1, ptr @g_ptr, align 8, !tbaa !8
+  store volatile ptr @func_2identical_v2, ptr @g_ptr, align 8, !tbaa !8
+  store volatile ptr @func_3identical_v1, ptr @g_ptr, align 8, !tbaa !8
+  store volatile ptr @func_3identical_v2, ptr @g_ptr, align 8, !tbaa !8
+  store volatile ptr @func_3identical_v3, ptr @g_ptr, align 8, !tbaa !8
+  ret void
+}
+
+attributes #0 = { mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
+attributes #1 = { mustprogress nofree noinline norecurse nounwind ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 1}
+!3 = !{i32 7, !"frame-pointer", i32 1}
+!4 = !{!"clang"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C++ TBAA"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"any pointer", !6, i64 0}
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;; Generate the above LLVM IR with the below script ;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; #!/bin/bash
+; set -ex
+; TOOLCHAIN_BIN="llvm-project/build/Debug/bin"
+;
+; # Create icf-safe-thunks.cpp file
+; cat > icf-safe-thunks.cpp <<EOF
+;
+; #define ATTR __attribute__((noinline)) extern "C"
+; typedef unsigned long long ULL;
+;
+; volatile char g_val = 0;
+; void *volatile g_ptr = 0;
+;
+; ATTR void func_unique_1() {
+;     g_val = 1;
+; }
+;
+; ATTR void func_unique_2_canmerge() {
+;     g_val = 2;
+; }
+;
+; ATTR void func_2identical_v1() {
+;     g_val = 2;
+; }
+;
+; ATTR void func_2identical_v2() {
+;     g_val = 2;
+; }
+;
+; ATTR void func_3identical_v1() {
+;     g_val = 3;
+; }
+;
+; ATTR void func_3identical_v2() {
+;     g_val = 3;
+; }
+;
+; ATTR void func_3identical_v3() {
+;     g_val = 3;
+; }
+;
+; ATTR void func_3identical_v1_canmerge() {
+;     g_val = 33;
+; }
+;
+; ATTR void func_3identical_v2_canmerge() {
+;     g_val = 33;
+; }
+;
+; ATTR void func_3identical_v3_canmerge() {
+;     g_val = 33;
+; }
+;
+; ATTR void call_all_funcs() {
+;     func_unique_1();
+;     func_unique_2_canmerge();
+;     func_2identical_v1();
+;     func_2identical_v2();
+;     func_3identical_v1();
+;     func_3identical_v2();
+;     func_3identical_v3();
+;     func_3identical_v1_canmerge();
+;     func_3identical_v2_canmerge();
+;     func_3identical_v3_canmerge();
+; }
+;
+; ATTR void take_func_addr() {
+;     g_ptr = (void*)func_unique_1;
+;     g_ptr = (void*)func_2identical_v1;
+;     g_ptr = (void*)func_2identical_v2;
+;     g_ptr = (void*)func_3identical_v1;
+;     g_ptr = (void*)func_3identical_v2;
+;     g_ptr = (void*)func_3identical_v3;
+; }
+; EOF
+;
+; $TOOLCHAIN_BIN/clang -target arm64-apple-macos11.0 -S -emit-llvm \
+;                      icf-safe-thunks.cpp -O3 -o icf-safe-thunks.ll

>From 334d0966edf1866567000dfdc92eae2fede7ac66 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 29 Aug 2024 21:30:11 -0700
Subject: [PATCH 2/9] Fix "br" => "b" branch

---
 lld/MachO/Arch/ARM64.cpp          | 2 +-
 lld/test/MachO/icf-safe-thunks.ll | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 30b6c27ff99f89..e6d2bc82099a49 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -181,7 +181,7 @@ void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
 }
 // Just a single direct branch to the target function.
 static constexpr uint32_t icfSafeThunkCode[] = {
-    0x94000000, // 08: b    target
+    0x14000000, // 08: b    target
 };
 
 void ARM64::initICFSafeThunkBody(InputSection *thunk,
diff --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll
index 2a0ca8314036f8..98a3c0b0d2865d 100644
--- a/lld/test/MachO/icf-safe-thunks.ll
+++ b/lld/test/MachO/icf-safe-thunks.ll
@@ -30,11 +30,11 @@
 ; CHECK-ARM64-NEXT:        adr
 ;
 ; CHECK-ARM64:        _func_2identical_v2:
-; CHECK-ARM64-NEXT:         bl  _func_unique_2_canmerge
+; CHECK-ARM64-NEXT:        b  _func_unique_2_canmerge
 ; CHECK-ARM64-NEXT:   _func_3identical_v2:
-; CHECK-ARM64-NEXT:        bl  _func_3identical_v1
+; CHECK-ARM64-NEXT:        b  _func_3identical_v1
 ; CHECK-ARM64-NEXT:   _func_3identical_v3:
-; CHECK-ARM64-NEXT:        bl  _func_3identical_v1
+; CHECK-ARM64-NEXT:        b  _func_3identical_v1
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
 target triple = "arm64-apple-macosx11.0.0"

>From 8bc9599e95f75231fb9eda656633a68333877431 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 30 Aug 2024 19:18:53 -0700
Subject: [PATCH 3/9] Address Feedback nr.1

---
 lld/MachO/Arch/ARM64.cpp          |   6 +-
 lld/MachO/Driver.cpp              |   3 +-
 lld/MachO/ICF.cpp                 | 100 ++++++++++++++++--------------
 lld/test/MachO/icf-safe-thunks.ll |   6 +-
 4 files changed, 61 insertions(+), 54 deletions(-)

diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index e6d2bc82099a49..195a8f09f47c1a 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -42,9 +42,9 @@ struct ARM64 : ARM64Common {
   void populateThunk(InputSection *thunk, Symbol *funcSym) override;
   void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
 
-  virtual void initICFSafeThunkBody(InputSection *thunk,
-                                    InputSection *branchTarget) const override;
-  virtual uint32_t getICFSafeThunkSize() const override;
+  void initICFSafeThunkBody(InputSection *thunk,
+                            InputSection *branchTarget) const override;
+  uint32_t getICFSafeThunkSize() const override;
 };
 
 } // namespace
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 7b23fdfea1303d..babbce2a81cd7d 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -851,8 +851,7 @@ static ICFLevel getICFLevel(const ArgList &args) {
                       .Case("all", ICFLevel::all)
                       .Default(ICFLevel::unknown);
 
-  if (icfLevel == ICFLevel::safe_thunks &&
-      !is_contained({AK_x86_64h, AK_arm64}, config->arch())) {
+  if ((icfLevel == ICFLevel::safe_thunks) && (config->arch() != AK_arm64)) {
     error("--icf=safe_thunks is only supported on arm64 targets");
   }
 
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index cac5e673986829..18efdfcfffb153 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -252,59 +252,69 @@ void ICF::forEachClassRange(size_t begin, size_t end,
   }
 }
 
-// Given a range of identical icfInputs's, replace address significant functions
+// Given a range of identical icfInputs, replace address significant functions
 // with a thunk that is just a direct branch to the first function in the
-// series. This way we end up we keep only one main body of the function but we
-// still retain address uniqueness of rellevant functions by having them be a
-// direct branch thunk rather than contain a full copy of the actual function
+// series. This way we keep only one main body of the function but we still
+// retain the address uniqueness of relevant functions by having them be a
+// direct branch thunk rather than containing a full copy of the actual function
 // body.
 void ICF::applySafeThunksToRange(size_t begin, size_t end) {
-  // If we need to create a unique ICF thunk, use the first section as the
-  // section that all thunks will branch to.
-  ConcatInputSection *masterIsec = icfInputs[begin];
+  // If the functions we're dealing with are smaller than the thunk size, then
+  // just leave them all as-is - creating thunks would be a net loss.
   uint32_t thunkSize = target->getICFSafeThunkSize();
-  static std::mutex thunkInsertionMutex;
+  if (icfInputs[begin]->data.size() <= thunkSize)
+    return;
+
+  // The standard ICF algorithm will merge all functions in the [begin + 1, end)
+  // range into icfInputs[begin].So, the body of the first function is always
+  // kept, even if it is not keepUnique. To make safe_thunks nicely play with
+  // this behavior, we ensure that the first function in the range is keepUnique.
+  if (!icfInputs[begin]->keepUnique) {
+    bool haveKeepUnique = false;
+    for (size_t i = begin + 1; i < end; ++i) {
+      if (icfInputs[i]->keepUnique) {
+        std::swap(icfInputs[begin], icfInputs[i]);
+        haveKeepUnique = true;
+        break;
+      }
+    }
+    // If we don't have keepUnique funcs, we can just return
+    if (!haveKeepUnique)
+      return;
+  }
 
-  uint32_t keepUniqueCount = masterIsec->keepUnique ? 1 : 0;
+  // When creating a unique ICF thunk, use the first section as the section that
+  // all thunks will branch to.
+  ConcatInputSection *masterIsec = icfInputs[begin];
+
+  static std::mutex thunkInsertionMutex;
   for (size_t i = begin + 1; i < end; ++i) {
     ConcatInputSection *isec = icfInputs[i];
-    if (isec->keepUnique)
-      ++keepUniqueCount;
-
-    // We create thunks for the 2nd, 3rd, ... keepUnique sections. The first
-    // keepUnique section we leave as is - as it will not end up sharing an
-    // address with any other keepUnique section.
-    if (keepUniqueCount >= 2 && isec->keepUnique) {
-      // If the target to be folded is smaller than the thunk size, then just
-      // leave it as-is - creating the thunk would be a net loss.
-      if (isec->data.size() <= thunkSize)
-        return;
-
-      // applySafeThunksToRange is called from multiple threads, but
-      // `makeSyntheticInputSection` and `addInputSection` are not thread safe.
-      // So we need to guard them with a mutex.
-      ConcatInputSection *thunk;
-      {
-        std::lock_guard<std::mutex> lock(thunkInsertionMutex);
-        thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
-        addInputSection(thunk);
-      }
+    if (!isec->keepUnique)
+      continue;
 
-      target->initICFSafeThunkBody(thunk, masterIsec);
-      thunk->foldIdentical(isec);
-
-      // Since we're folding the target function into a thunk, we need to adjust
-      // the symbols that now got relocated from the target function to the
-      // thunk.
-      // Since the thunk is only one branch, we move all symbols to offset 0 and
-      // make sure that the size of all non-zero-size symbols is equal to the
-      // size of the branch.
-      for (auto *sym : isec->symbols) {
-        if (sym->value != 0)
-          sym->value = 0;
-        if (sym->size != 0)
-          sym->size = thunkSize;
-      }
+    // applySafeThunksToRange is called from multiple threads, but
+    // `makeSyntheticInputSection` and `addInputSection` are not thread safe. So
+    // we need to guard them with a mutex.
+    ConcatInputSection *thunk;
+    {
+      std::lock_guard<std::mutex> lock(thunkInsertionMutex);
+      thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
+      addInputSection(thunk);
+    }
+
+    target->initICFSafeThunkBody(thunk, masterIsec);
+    thunk->foldIdentical(isec);
+
+    // Since we're folding the target function into a thunk, we need to adjust
+    // the symbols that now got relocated from the target function to the thunk.
+    // Since the thunk is only one branch, we move all symbols to offset 0 and
+    // make sure that the size of all non-zero-size symbols is equal to the size
+    // of the branch.
+    for (auto *sym : isec->symbols) {
+      sym->value = 0;
+      if (sym->size != 0)
+        sym->size = thunkSize;
     }
   }
 }
diff --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll
index 98a3c0b0d2865d..23dd271d6d75a3 100644
--- a/lld/test/MachO/icf-safe-thunks.ll
+++ b/lld/test/MachO/icf-safe-thunks.ll
@@ -10,9 +10,7 @@
 ; CHECK-ARM64-NEXT:        mov {{.*}}, #0x1
 ;
 ; CHECK-ARM64:        _func_unique_2_canmerge:
-; CHECK-ARM64-NEXT:        mov {{.*}}, #0x2
-;
-; CHECK-ARM64:        _func_2identical_v1:
+; CHECK-ARM64-NEXT:   _func_2identical_v1:
 ; CHECK-ARM64-NEXT:        mov {{.*}}, #0x2
 ;
 ; CHECK-ARM64:        _func_3identical_v1:
@@ -30,7 +28,7 @@
 ; CHECK-ARM64-NEXT:        adr
 ;
 ; CHECK-ARM64:        _func_2identical_v2:
-; CHECK-ARM64-NEXT:        b  _func_unique_2_canmerge
+; CHECK-ARM64-NEXT:        b  _func_2identical_v1
 ; CHECK-ARM64-NEXT:   _func_3identical_v2:
 ; CHECK-ARM64-NEXT:        b  _func_3identical_v1
 ; CHECK-ARM64-NEXT:   _func_3identical_v3:

>From 4c69771b0144965f799dd3fb299512e9faddc237 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 30 Aug 2024 19:24:46 -0700
Subject: [PATCH 4/9] Fix Formatting

---
 lld/MachO/ICF.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 18efdfcfffb153..f454885a51a4aa 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -268,7 +268,8 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
   // The standard ICF algorithm will merge all functions in the [begin + 1, end)
   // range into icfInputs[begin].So, the body of the first function is always
   // kept, even if it is not keepUnique. To make safe_thunks nicely play with
-  // this behavior, we ensure that the first function in the range is keepUnique.
+  // this behavior, we ensure that the first function in the range is
+  // keepUnique.
   if (!icfInputs[begin]->keepUnique) {
     bool haveKeepUnique = false;
     for (size_t i = begin + 1; i < end; ++i) {

>From d48c15758f43268782d3a446a07fcd77fcd9adfb Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 08:20:30 -0700
Subject: [PATCH 5/9] Fix map file, remove thread locking

---
 lld/MachO/ICF.cpp                 | 37 ++++++++++++++++++-------------
 lld/MachO/InputSection.cpp        |  5 +++--
 lld/MachO/InputSection.h          |  4 +++-
 lld/MachO/MapFile.cpp             |  2 +-
 lld/MachO/Symbols.cpp             |  2 +-
 lld/MachO/Symbols.h               |  9 ++++++--
 lld/MachO/SyntheticSections.cpp   |  3 ++-
 lld/test/MachO/icf-safe-thunks.ll | 17 +++++++++++++-
 8 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index f454885a51a4aa..bea2318a69c87e 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -266,7 +266,7 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
     return;
 
   // The standard ICF algorithm will merge all functions in the [begin + 1, end)
-  // range into icfInputs[begin].So, the body of the first function is always
+  // range into icfInputs[begin], so the body of the first function is always
   // kept, even if it is not keepUnique. To make safe_thunks nicely play with
   // this behavior, we ensure that the first function in the range is
   // keepUnique.
@@ -288,31 +288,26 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
   // all thunks will branch to.
   ConcatInputSection *masterIsec = icfInputs[begin];
 
-  static std::mutex thunkInsertionMutex;
   for (size_t i = begin + 1; i < end; ++i) {
     ConcatInputSection *isec = icfInputs[i];
+    // When we're done processing keepUnique entries, we can stop. Sorting
+    // guaratees that all keepUnique will be at the front.
     if (!isec->keepUnique)
-      continue;
+      break;
 
-    // applySafeThunksToRange is called from multiple threads, but
-    // `makeSyntheticInputSection` and `addInputSection` are not thread safe. So
-    // we need to guard them with a mutex.
     ConcatInputSection *thunk;
-    {
-      std::lock_guard<std::mutex> lock(thunkInsertionMutex);
-      thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
-      addInputSection(thunk);
-    }
+    thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
+    addInputSection(thunk);
 
     target->initICFSafeThunkBody(thunk, masterIsec);
-    thunk->foldIdentical(isec);
+    thunk->foldIdentical(isec, Symbol::ICFFoldKind::Folded_Thunk);
 
     // Since we're folding the target function into a thunk, we need to adjust
     // the symbols that now got relocated from the target function to the thunk.
     // Since the thunk is only one branch, we move all symbols to offset 0 and
     // make sure that the size of all non-zero-size symbols is equal to the size
     // of the branch.
-    for (auto *sym : isec->symbols) {
+    for (auto *sym : thunk->symbols) {
       sym->value = 0;
       if (sym->size != 0)
         sym->size = thunkSize;
@@ -381,6 +376,12 @@ void ICF::run() {
 
   llvm::stable_sort(
       icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
+        // When using safe_thunks, ensure that we first sort by icfEqClass and
+        // then by keepUnique (descending). This guarantees that within an
+        // equivalence class, the keepUnique inputs are always first.
+        if (config->icfLevel == ICFLevel::safe_thunks)
+          if (a->icfEqClass[0] == b->icfEqClass[0])
+            return a->keepUnique > b->keepUnique;
         return a->icfEqClass[0] < b->icfEqClass[0];
       });
   forEachClass([&](size_t begin, size_t end) {
@@ -400,6 +401,14 @@ void ICF::run() {
     log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
   }
 
+  // When using safe_thunks, we need to create thunks for all keepUnique
+  // functions that can be deduplicated. Since we're creating / adding new
+  // InputSections, we can't paralellize this.
+  if (config->icfLevel == ICFLevel::safe_thunks)
+    forEachClassRange(0, icfInputs.size(), [&](size_t begin, size_t end) {
+      applySafeThunksToRange(begin, end);
+    });
+
   // Fold sections within equivalence classes
   forEachClass([&](size_t begin, size_t end) {
     if (end - begin < 2)
@@ -408,8 +417,6 @@ void ICF::run() {
 
     // For ICF level safe_thunks, replace keepUnique function bodies with
     // thunks. For all other ICF levles, directly merge the functions.
-    if (useSafeThunks)
-      applySafeThunksToRange(begin, end);
 
     ConcatInputSection *beginIsec = icfInputs[begin];
     for (size_t i = begin + 1; i < end; ++i) {
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index a9b93e07a60133..64c584920defba 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -190,13 +190,14 @@ const Reloc *InputSection::getRelocAt(uint32_t off) const {
   return &*it;
 }
 
-void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
+void ConcatInputSection::foldIdentical(ConcatInputSection *copy,
+                                       Symbol::ICFFoldKind foldKind) {
   align = std::max(align, copy->align);
   copy->live = false;
   copy->wasCoalesced = true;
   copy->replacement = this;
   for (auto &copySym : copy->symbols)
-    copySym->wasIdenticalCodeFolded = true;
+    copySym->identicalCodeFoldingKind = foldKind;
 
   symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
   copy->symbols.clear();
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 0f389e50425a32..f289864a75ca08 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -117,7 +117,9 @@ class ConcatInputSection final : public InputSection {
   bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
   void writeTo(uint8_t *buf);
 
-  void foldIdentical(ConcatInputSection *redundant);
+  void foldIdentical(
+      ConcatInputSection *redundant,
+      Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Folded_Body);
   ConcatInputSection *canonical() override {
     return replacement ? replacement : this;
   }
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 5bcaeca48da2a2..20dbd1ed9d033e 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -156,7 +156,7 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
 }
 
 static uint64_t getSymSizeForMap(Defined *sym) {
-  if (sym->wasIdenticalCodeFolded)
+  if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Folded_Body)
     return 0;
   return sym->size;
 }
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 7bac78a59e4fec..910d2393eaf98d 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -60,7 +60,7 @@ Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec,
                  bool interposable)
     : Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef),
       privateExtern(isPrivateExtern), includeInSymtab(includeInSymtab),
-      wasIdenticalCodeFolded(false),
+      identicalCodeFoldingKind(ICFFoldKind::None),
       referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
       interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden),
       weakDef(isWeakDef), external(isExternal), originalIsec(isec),
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index be5d135b8cd54f..70ef59557a4038 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -41,6 +41,11 @@ class Symbol {
     AliasKind,
   };
 
+  // Enum that describes the kind of ICF folding that a symbol is involved in.
+  // We need to keep track of this to correctly display symbol sizes in the map
+  // file.
+  enum ICFFoldKind { None, Folded_Body, Folded_Thunk };
+
   virtual ~Symbol() {}
 
   Kind kind() const { return symbolKind; }
@@ -154,8 +159,8 @@ class Defined : public Symbol {
   bool privateExtern : 1;
   // Whether this symbol should appear in the output symbol table.
   bool includeInSymtab : 1;
-  // Whether this symbol was folded into a different symbol during ICF.
-  bool wasIdenticalCodeFolded : 1;
+  // The ICF folding kind of this symbol: None / Folded / Folded_Thunk.
+  ICFFoldKind identicalCodeFoldingKind : 2;
   // Symbols marked referencedDynamically won't be removed from the output's
   // symbol table by tools like strip. In theory, this could be set on arbitrary
   // symbols in input object files. In practice, it's used solely for the
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 939e9b286d77f5..cfae3f82bb25a8 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1231,7 +1231,8 @@ void SymtabSection::emitStabs() {
 
       // Constant-folded symbols go in the executable's symbol table, but don't
       // get a stabs entry unless --keep-icf-stabs flag is specified
-      if (!config->keepICFStabs && defined->wasIdenticalCodeFolded)
+      if (!config->keepICFStabs &&
+          defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Folded_Body)
         continue;
 
       ObjFile *file = defined->getObjectFile();
diff --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll
index 23dd271d6d75a3..238e90f952e160 100644
--- a/lld/test/MachO/icf-safe-thunks.ll
+++ b/lld/test/MachO/icf-safe-thunks.ll
@@ -2,8 +2,9 @@
 
 ; RUN: rm -rf %t; mkdir %t
 ; RUN: llc -filetype=obj %s -O3 -o %t/icf-obj-safe-thunks.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
-; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/icf-safe.dylib %t/icf-obj-safe-thunks.o
+; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/icf-safe.dylib -map %t/icf-safe.map %t/icf-obj-safe-thunks.o
 ; RUN: llvm-objdump %t/icf-safe.dylib -d --macho | FileCheck %s --check-prefixes=CHECK-ARM64
+; RUN: cat %t/icf-safe.map | FileCheck %s --check-prefixes=CHECK-ARM64-MAP
 
 ; CHECK-ARM64:        (__TEXT,__text) section
 ; CHECK-ARM64-NEXT:   _func_unique_1:
@@ -34,6 +35,20 @@
 ; CHECK-ARM64-NEXT:   _func_3identical_v3:
 ; CHECK-ARM64-NEXT:        b  _func_3identical_v1
 
+
+; CHECK-ARM64-MAP:      0x00000010 [  2] _func_unique_1
+; CHECK-ARM64-MAP-NEXT: 0x00000010 [  2] _func_2identical_v1
+; CHECK-ARM64-MAP-NEXT: 0x00000000 [  2] _func_unique_2_canmerge
+; CHECK-ARM64-MAP-NEXT: 0x00000010 [  2] _func_3identical_v1
+; CHECK-ARM64-MAP-NEXT: 0x00000010 [  2] _func_3identical_v1_canmerge
+; CHECK-ARM64-MAP-NEXT: 0x00000000 [  2] _func_3identical_v2_canmerge
+; CHECK-ARM64-MAP-NEXT: 0x00000000 [  2] _func_3identical_v3_canmerge
+; CHECK-ARM64-MAP-NEXT: 0x00000034 [  2] _call_all_funcs
+; CHECK-ARM64-MAP-NEXT: 0x00000050 [  2] _take_func_addr
+; CHECK-ARM64-MAP-NEXT: 0x00000004 [  2] _func_2identical_v2
+; CHECK-ARM64-MAP-NEXT: 0x00000004 [  2] _func_3identical_v2
+; CHECK-ARM64-MAP-NEXT: 0x00000004 [  2] _func_3identical_v3
+
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
 target triple = "arm64-apple-macosx11.0.0"
 

>From fe7d0262a1dd500195a13c75a0a7cf038c8a6f31 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 08:51:49 -0700
Subject: [PATCH 6/9] Add assert

---
 lld/MachO/ICF.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index bea2318a69c87e..637affd82f080f 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -421,8 +421,14 @@ void ICF::run() {
     ConcatInputSection *beginIsec = icfInputs[begin];
     for (size_t i = begin + 1; i < end; ++i) {
       // When using safe_thunks, keepUnique inputs are already handeled above
-      if (useSafeThunks && icfInputs[i]->keepUnique)
+      if (useSafeThunks && icfInputs[i]->keepUnique) {
+        // Assert that all keepUnique input sections have been replaced with
+        // thunks
+        assert(!icfInputs[i]->live);
+        assert(icfInputs[i]->replacement->data.size() ==
+               target->getICFSafeThunkSize());
         continue;
+      }
       beginIsec->foldIdentical(icfInputs[i]);
     }
   });

>From 9f1a42f357c3cfab10654309bf47610b8dfeaa07 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 10:55:39 -0700
Subject: [PATCH 7/9] Fix assert

---
 lld/MachO/ICF.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 637affd82f080f..f6afd48d294159 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -420,13 +420,14 @@ void ICF::run() {
 
     ConcatInputSection *beginIsec = icfInputs[begin];
     for (size_t i = begin + 1; i < end; ++i) {
-      // When using safe_thunks, keepUnique inputs are already handeled above
+      // Skip keepUnique inputs when using safe_thunks (already handeled above)
       if (useSafeThunks && icfInputs[i]->keepUnique) {
-        // Assert that all keepUnique input sections have been replaced with
-        // thunks
-        assert(!icfInputs[i]->live);
-        assert(icfInputs[i]->replacement->data.size() ==
-               target->getICFSafeThunkSize());
+        // Assert keepUnique sections are either small or replaced with thunks.
+        assert(!icfInputs[i]->live ||
+               icfInputs[i]->data.size() <= target->getICFSafeThunkSize());
+        assert(!icfInputs[i]->replacement ||
+               icfInputs[i]->replacement->data.size() ==
+                   target->getICFSafeThunkSize());
         continue;
       }
       beginIsec->foldIdentical(icfInputs[i]);

>From 5ed74eee454f3ea24d78488e3c121a1322a707b6 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 12:49:07 -0700
Subject: [PATCH 8/9] Address feedback

---
 lld/MachO/ICF.cpp               | 25 +++----------------------
 lld/MachO/InputSection.h        |  2 +-
 lld/MachO/MapFile.cpp           |  2 +-
 lld/MachO/Symbols.h             | 14 +++++++++-----
 lld/MachO/SyntheticSections.cpp |  2 +-
 5 files changed, 15 insertions(+), 30 deletions(-)

diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index f6afd48d294159..2ff962b06e3679 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -265,25 +265,6 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
   if (icfInputs[begin]->data.size() <= thunkSize)
     return;
 
-  // The standard ICF algorithm will merge all functions in the [begin + 1, end)
-  // range into icfInputs[begin], so the body of the first function is always
-  // kept, even if it is not keepUnique. To make safe_thunks nicely play with
-  // this behavior, we ensure that the first function in the range is
-  // keepUnique.
-  if (!icfInputs[begin]->keepUnique) {
-    bool haveKeepUnique = false;
-    for (size_t i = begin + 1; i < end; ++i) {
-      if (icfInputs[i]->keepUnique) {
-        std::swap(icfInputs[begin], icfInputs[i]);
-        haveKeepUnique = true;
-        break;
-      }
-    }
-    // If we don't have keepUnique funcs, we can just return
-    if (!haveKeepUnique)
-      return;
-  }
-
   // When creating a unique ICF thunk, use the first section as the section that
   // all thunks will branch to.
   ConcatInputSection *masterIsec = icfInputs[begin];
@@ -295,12 +276,12 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
     if (!isec->keepUnique)
       break;
 
-    ConcatInputSection *thunk;
-    thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
+    ConcatInputSection *thunk =
+        makeSyntheticInputSection(isec->getSegName(), isec->getName());
     addInputSection(thunk);
 
     target->initICFSafeThunkBody(thunk, masterIsec);
-    thunk->foldIdentical(isec, Symbol::ICFFoldKind::Folded_Thunk);
+    thunk->foldIdentical(isec, Symbol::ICFFoldKind::Thunk);
 
     // Since we're folding the target function into a thunk, we need to adjust
     // the symbols that now got relocated from the target function to the thunk.
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index f289864a75ca08..0785d5d90f1084 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -119,7 +119,7 @@ class ConcatInputSection final : public InputSection {
 
   void foldIdentical(
       ConcatInputSection *redundant,
-      Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Folded_Body);
+      Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
   ConcatInputSection *canonical() override {
     return replacement ? replacement : this;
   }
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 20dbd1ed9d033e..9c0621622ae2f0 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -156,7 +156,7 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
 }
 
 static uint64_t getSymSizeForMap(Defined *sym) {
-  if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Folded_Body)
+  if (sym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
     return 0;
   return sym->size;
 }
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 70ef59557a4038..e355e31bb36d6b 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -41,10 +41,14 @@ class Symbol {
     AliasKind,
   };
 
-  // Enum that describes the kind of ICF folding that a symbol is involved in.
-  // We need to keep track of this to correctly display symbol sizes in the map
-  // file.
-  enum ICFFoldKind { None, Folded_Body, Folded_Thunk };
+  // Enum that describes the type of Identical Code Folding (ICF) applied to a
+  // symbol. This information is crucial for accurately representing symbol
+  // sizes in the map file.
+  enum ICFFoldKind {
+    None, // No folding is applied.
+    Body, // The entire body (function or data) is folded.
+    Thunk // The function body is folded into a single branch thunk.
+  };
 
   virtual ~Symbol() {}
 
@@ -159,7 +163,7 @@ class Defined : public Symbol {
   bool privateExtern : 1;
   // Whether this symbol should appear in the output symbol table.
   bool includeInSymtab : 1;
-  // The ICF folding kind of this symbol: None / Folded / Folded_Thunk.
+  // The ICF folding kind of this symbol: None / Body / Thunk.
   ICFFoldKind identicalCodeFoldingKind : 2;
   // Symbols marked referencedDynamically won't be removed from the output's
   // symbol table by tools like strip. In theory, this could be set on arbitrary
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index cfae3f82bb25a8..7b0078856c5e22 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1232,7 +1232,7 @@ void SymtabSection::emitStabs() {
       // Constant-folded symbols go in the executable's symbol table, but don't
       // get a stabs entry unless --keep-icf-stabs flag is specified
       if (!config->keepICFStabs &&
-          defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Folded_Body)
+          defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
         continue;
 
       ObjFile *file = defined->getObjectFile();

>From 88d7983964385718d69213af22bc305cf14d33c6 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 5 Sep 2024 13:12:48 -0700
Subject: [PATCH 9/9] Fix formatting

---
 lld/MachO/InputSection.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 0785d5d90f1084..4e238d8ef77793 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -117,9 +117,8 @@ class ConcatInputSection final : public InputSection {
   bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
   void writeTo(uint8_t *buf);
 
-  void foldIdentical(
-      ConcatInputSection *redundant,
-      Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
+  void foldIdentical(ConcatInputSection *redundant,
+                     Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
   ConcatInputSection *canonical() override {
     return replacement ? replacement : this;
   }



More information about the llvm-commits mailing list