[lld] 97a4324 - [lld-macho] Fix ICF differentiation of safe_thunks relocs (#111811)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 10 08:22:52 PDT 2024


Author: alx32
Date: 2024-10-10T08:22:48-07:00
New Revision: 97a43242246bf4a55e68bddf3e6a0500c07803cc

URL: https://github.com/llvm/llvm-project/commit/97a43242246bf4a55e68bddf3e6a0500c07803cc
DIFF: https://github.com/llvm/llvm-project/commit/97a43242246bf4a55e68bddf3e6a0500c07803cc.diff

LOG: [lld-macho] Fix ICF differentiation of safe_thunks relocs (#111811)

In `--icf=safe_thunks` mode, the linker differentiates `keepUnique`
functions by creating thunks during a post-processing step after
Identical Code Folding (ICF). While this ensures that `keepUnique`
functions themselves are not incorrectly merged, it overlooks functions
that reference these `keepUnique` symbols.

If two functions are identical except for references to different
`keepUnique` functions, the current ICF algorithm incorrectly considers
them identical because it doesn't account for the future differentiation
introduced by thunks. This leads to incorrect deduplication of functions
that should remain distinct.

To address this issue, we modify the ICF comparison to explicitly check
for references to `keepUnique` functions during deduplication. By doing
so, functions that reference different `keepUnique` symbols are
correctly identified as distinct, preventing erroneous merging and
ensuring the correctness of the linked output.

Added: 
    

Modified: 
    lld/MachO/ICF.cpp
    lld/test/MachO/icf-safe-thunks.ll

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 2ff962b06e3679..aedaecfdeb2c01 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -147,6 +147,17 @@ bool ICF::equalsConstant(const ConcatInputSection *ia,
       isecB = rb.referent.get<InputSection *>();
     }
 
+    // Typically, we should not encounter sections marked with `keepUnique` at
+    // this point as they would have resulted in 
diff erent hashes and therefore
+    // no need for a full comparison.
+    // However, in `safe_thunks` mode, it's possible for two 
diff erent
+    // relocations to reference identical `keepUnique` functions that will be
+    // distinguished later via thunks - so we need to handle this case
+    // explicitly.
+    if ((isecA != isecB) && ((isecA->keepUnique && isCodeSection(isecA)) ||
+                             (isecB->keepUnique && isCodeSection(isecB))))
+      return false;
+
     if (isecA->parent != isecB->parent)
       return false;
     // Sections with identical parents should be of the same kind.

diff  --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll
index 238e90f952e160..95e00a5b98385b 100644
--- a/lld/test/MachO/icf-safe-thunks.ll
+++ b/lld/test/MachO/icf-safe-thunks.ll
@@ -22,6 +22,13 @@
 ; CHECK-ARM64-NEXT:   _func_3identical_v3_canmerge:
 ; CHECK-ARM64-NEXT:        mov {{.*}}, #0x21
 ;
+; CHECK-ARM64:        _func_call_thunked_1_nomerge:
+; CHECK-ARM64-NEXT:        stp	x29
+;
+; CHECK-ARM64:        _func_call_thunked_2_nomerge:
+; CHECK-ARM64-NEXT:   _func_call_thunked_2_merge:
+; CHECK-ARM64-NEXT:        stp	x29
+;
 ; CHECK-ARM64:        _call_all_funcs:
 ; CHECK-ARM64-NEXT:        stp  x29
 ;
@@ -43,6 +50,9 @@
 ; CHECK-ARM64-MAP-NEXT: 0x00000010 [  2] _func_3identical_v1_canmerge
 ; CHECK-ARM64-MAP-NEXT: 0x00000000 [  2] _func_3identical_v2_canmerge
 ; CHECK-ARM64-MAP-NEXT: 0x00000000 [  2] _func_3identical_v3_canmerge
+; CHECK-ARM64-MAP-NEXT: 0x00000020 [  2] _func_call_thunked_1_nomerge
+; CHECK-ARM64-MAP-NEXT: 0x00000020 [  2] _func_call_thunked_2_nomerge
+; CHECK-ARM64-MAP-NEXT: 0x00000000 [  2] _func_call_thunked_2_merge
 ; CHECK-ARM64-MAP-NEXT: 0x00000034 [  2] _call_all_funcs
 ; CHECK-ARM64-MAP-NEXT: 0x00000050 [  2] _take_func_addr
 ; CHECK-ARM64-MAP-NEXT: 0x00000004 [  2] _func_2identical_v2
@@ -125,6 +135,30 @@ entry:
   ret void
 }
 
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_call_thunked_1_nomerge() local_unnamed_addr #0 {
+entry:
+  tail call void @func_2identical_v1()
+  store volatile i8 77, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_call_thunked_2_nomerge() local_unnamed_addr #0 {
+entry:
+  tail call void @func_2identical_v2()
+  store volatile i8 77, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp memory(readwrite, argmem: none) uwtable(sync)
+define void @func_call_thunked_2_merge() local_unnamed_addr #0 {
+entry:
+  tail call void @func_2identical_v2()
+  store volatile i8 77, ptr @g_val, align 1, !tbaa !5
+  ret void
+}
+
 ; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp uwtable(sync)
 define void @call_all_funcs() local_unnamed_addr #1 {
 entry:
@@ -227,6 +261,21 @@ attributes #1 = { mustprogress nofree noinline norecurse nounwind ssp uwtable(sy
 ;     g_val = 33;
 ; }
 ;
+; ATTR void func_call_thunked_1_nomerge() {
+;     func_2identical_v1();
+;     g_val = 77;
+; }
+;
+; ATTR void func_call_thunked_2_nomerge() {
+;     func_2identical_v2();
+;     g_val = 77;
+; }
+;
+; ATTR void func_call_thunked_2_merge() {
+;     func_2identical_v2();
+;     g_val = 77;
+; }
+;
 ; ATTR void call_all_funcs() {
 ;     func_unique_1();
 ;     func_unique_2_canmerge();


        


More information about the llvm-commits mailing list