[lld] [lld-macho] Fix STABS entries for `--icf=safe_thunks` and `--keep-icf-stabs` (PR #133179)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 16:25:12 PDT 2025
https://github.com/alx32 created https://github.com/llvm/llvm-project/pull/133179
When using the linker flags `--icf=safe_thunks` and `--keep-icf-stabs` together, an issue arises with the STABS debugging entries in the linked output. The problem affects STABS entries for functions that are folded via ICF using thunks.
For instance, if `func1` is merged into `func2` through a thunk, the STABS entry for `func1` incorrectly points to the object file of `func2`. This is incorrect behavior—each function’s STABS entry should consistently point to its own original object file (e.g., the STABS entry for `func1` should reference `func1`’s object file). This issue causes `dsymutil` to not be able to retrieve the debug information for the problematic function.
This patch corrects this behavior - making it so that STABS entries always point to the correct object file.
>From 373fed2debcc6997f945496dc46a988131bae15e Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Wed, 26 Mar 2025 16:16:59 -0700
Subject: [PATCH] [lld-macho] Fix STABS entries for thunk ICF
---
lld/MachO/SyntheticSections.cpp | 53 +++++----
lld/test/MachO/icf-safe-thunks-dwarf.ll | 140 +++++++++++++++++++++++-
2 files changed, 168 insertions(+), 25 deletions(-)
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 60b57bb3a192c..dfacaf2ef4e0d 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1205,18 +1205,6 @@ void SymtabSection::emitEndFunStab(Defined *defined) {
stabs.emplace_back(std::move(stab));
}
-// Given a pointer to a function symbol, return the symbol that points to the
-// actual function body that will go in the final binary. Generally this is the
-// symbol itself, but if the symbol was folded using a thunk, we retrieve the
-// target function body from the thunk.
-Defined *SymtabSection::getFuncBodySym(Defined *originalSym) {
- if (originalSym->identicalCodeFoldingKind == Symbol::ICFFoldKind::None ||
- originalSym->identicalCodeFoldingKind == Symbol::ICFFoldKind::Body)
- return originalSym;
-
- return macho::getBodyForThunkFoldedSym(originalSym);
-}
-
void SymtabSection::emitStabs() {
if (config->omitDebugInfo)
return;
@@ -1252,10 +1240,11 @@ void SymtabSection::emitStabs() {
if (!file || !file->compileUnit)
continue;
- // We use 'originalIsec' to get the file id of the symbol since 'isec()'
- // might point to the merged ICF symbol's file
- symbolsNeedingStabs.emplace_back(
- defined, getFuncBodySym(defined)->originalIsec->getFile()->id);
+ // We use the symbol's original InputSection to get the file id,
+ // even for ICF folded symbols, to ensure STABS entries point to the
+ // correct object file where the symbol was originally defined
+ symbolsNeedingStabs.emplace_back(defined,
+ defined->originalIsec->getFile()->id);
}
}
@@ -1270,10 +1259,12 @@ void SymtabSection::emitStabs() {
InputFile *lastFile = nullptr;
for (SortingPair &pair : symbolsNeedingStabs) {
Defined *defined = pair.first;
- // We use 'originalIsec' of the symbol since we care about the actual origin
- // of the symbol, not the canonical location returned by `isec()`.
- Defined *funcBodySym = getFuncBodySym(defined);
- InputSection *isec = funcBodySym->originalIsec;
+ // When emitting STABS entries for a symbol, always use the original
+ // InputSection of the defined symbol, not the section of the function body
+ // (which might be a different function entirely if ICF folded this
+ // function). This ensures STABS entries point back to the original object
+ // file.
+ InputSection *isec = defined->originalIsec;
ObjFile *file = cast<ObjFile>(isec->getFile());
if (lastFile == nullptr || lastFile != file) {
@@ -1288,12 +1279,30 @@ void SymtabSection::emitStabs() {
StabsEntry symStab;
symStab.sect = isec->parent->index;
symStab.strx = stringTableSection.addString(defined->getName());
- symStab.value = funcBodySym->getVA();
+
+ // When using --keep-icf-stabs, we need to use the VA of the actual function
+ // body that the linker will place in the binary. This is the function that
+ // the symbol refers to after ICF folding.
+ if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
+ // For thunks, we need to get the function they point to
+ Defined *target = getBodyForThunkFoldedSym(defined);
+ symStab.value = target->getVA();
+ } else {
+ symStab.value = defined->getVA();
+ }
if (isCodeSection(isec)) {
symStab.type = N_FUN;
stabs.emplace_back(std::move(symStab));
- emitEndFunStab(funcBodySym);
+ // For the end function marker in STABS, we need to use the size of the
+ // actual function body that exists in the output binary
+ if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
+ // For thunks, we use the target's size
+ Defined *target = getBodyForThunkFoldedSym(defined);
+ emitEndFunStab(target);
+ } else {
+ emitEndFunStab(defined);
+ }
} else {
symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
stabs.emplace_back(std::move(symStab));
diff --git a/lld/test/MachO/icf-safe-thunks-dwarf.ll b/lld/test/MachO/icf-safe-thunks-dwarf.ll
index 9edad60946837..3b2ee89e20976 100644
--- a/lld/test/MachO/icf-safe-thunks-dwarf.ll
+++ b/lld/test/MachO/icf-safe-thunks-dwarf.ll
@@ -3,6 +3,7 @@
; RUN: rm -rf %t && split-file %s %t
+; Test single object file case
; RUN: llc -filetype=obj %t/a.ll -O3 -o %t/a.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -o %t/a.dylib %t/a.o
@@ -26,6 +27,44 @@
; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks --keep-icf-stabs -dylib -o %t/a_thunks.dylib %t/a.o
; RUN: dsymutil -s %t/a_thunks.dylib > %t/a_thunks.txt
+;;;;;;;;;;;;;;;;;;;;;;;;;;;; Test multiple object files with identical functions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; RUN: llc -filetype=obj %t/b.ll -O3 -o %t/b.o -enable-machine-outliner=never -mtriple arm64-apple-macos -addrsig
+; RUN: %lld -arch arm64 -lSystem --icf=safe_thunks --keep-icf-stabs -dylib -o %t/multi_thunks.dylib %t/a.o %t/b.o
+; RUN: dsymutil -s %t/multi_thunks.dylib | FileCheck %s --check-prefix=VERIFY-MULTI-STABS
+
+# Check that STABS entries correctly associate functions with their originating object files
+# VERIFY-MULTI-STABS-LABEL: Symbol table for: '{{.*}}/multi_thunks.dylib'
+
+# First object file's source and object file entries
+# VERIFY-MULTI-STABS: N_SO{{.*}}a.cpp
+# VERIFY-MULTI-STABS: N_OSO{{.*}}a.o
+
+# Functions from the first object file - all functions share the same address but belong to a.o
+# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR:[0-9a-f]+]] '_func_A'
+# VERIFY-MULTI-STABS: N_FUN{{.*}}00 0000 {{.*}}
+# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR]] '_func_B'
+# VERIFY-MULTI-STABS: N_FUN{{.*}}00 0000 {{.*}}
+# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR]] '_func_C'
+# VERIFY-MULTI-STABS: N_FUN{{.*}}00 0000 {{.*}}
+# VERIFY-MULTI-STABS: N_FUN{{.*}}[0-9a-f]+ '_take_func_addr'
+
+# End of first object file's entries
+# VERIFY-MULTI-STABS: N_SO{{.*}}01 0000 0000000000000000
+
+# Second object file's source and object file entries
+# VERIFY-MULTI-STABS: N_SO{{.*}}b.cpp
+# VERIFY-MULTI-STABS: N_OSO{{.*}}b.o
+
+# Functions from the second object file - same addresses but different object file
+# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR]] '_func_D'
+# VERIFY-MULTI-STABS: N_FUN{{.*}}00 0000 {{.*}}
+# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR]] '_func_E'
+# VERIFY-MULTI-STABS: N_FUN{{.*}}00 0000 {{.*}}
+# VERIFY-MULTI-STABS: N_FUN{{.*}}[[FUNC_ADDR]] '_func_F'
+# VERIFY-MULTI-STABS: N_FUN{{.*}}00 0000 {{.*}}
+# VERIFY-MULTI-STABS: N_FUN{{.*}}[0-9a-f]+ '_take_func_addr_b'
+
; RUN: dsymutil --flat --verify-dwarf=none %t/a_thunks.dylib -o %t/a_thunks.dSYM
; RUN: dsymutil -s %t/a_thunks.dSYM >> %t/a_thunks.txt
@@ -73,6 +112,9 @@
# VERIFY-THUNK-NEXT: {{ +}}DW_AT_low_pc (0x[[MERGED_FUN_ADDR]])
# VERIFY-THUNK-NEXT-NEXT-NEXT-NEXT-NEXT: {{ +}}DW_AT_name ("func_C")
+
+
+
;--- a.cpp
#define ATTR __attribute__((noinline)) extern "C"
typedef unsigned long long ULL;
@@ -89,32 +131,56 @@ ATTR ULL take_func_addr() {
return val;
}
+;--- b.cpp
+#define ATTR __attribute__((noinline)) extern "C"
+typedef unsigned long long ULL;
+
+// Identical functions in a different object file
+ATTR int func_D() { return 1; }
+ATTR int func_E() { return 1; }
+ATTR int func_F() { return 1; }
+
+ATTR ULL take_func_addr_b() {
+ ULL val = 0;
+ val += (ULL)(void*)func_D;
+ val += (ULL)(void*)func_E;
+ val += (ULL)(void*)func_F;
+ return val;
+}
+
;--- gen
-clang -target arm64-apple-macos11.0 -S -emit-llvm a.cpp -O3 -g -o -
+clang -target arm64-apple-macos11.0 -S -emit-llvm a.cpp -O3 -g -fdebug-compilation-dir=/proc/self/cwd -o -
+echo ""
+echo ";--- b.ll"
+clang -target arm64-apple-macos11.0 -S -emit-llvm b.cpp -O3 -g -fdebug-compilation-dir=/proc/self/cwd -o -
;--- a.ll
; ModuleID = 'a.cpp'
source_filename = "a.cpp"
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx11.0.0"
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_A() #0 !dbg !12 {
+entry:
ret i32 1, !dbg !16
}
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_B() #0 !dbg !17 {
+entry:
ret i32 1, !dbg !18
}
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i32 @func_C() #0 !dbg !19 {
+entry:
ret i32 1, !dbg !20
}
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
define noundef i64 @take_func_addr() local_unnamed_addr #0 !dbg !21 {
+entry:
#dbg_value(i64 0, !25, !DIExpression(), !26)
#dbg_value(i64 ptrtoint (ptr @func_A to i64), !25, !DIExpression(), !26)
#dbg_value(i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), !25, !DIExpression(), !26)
@@ -122,7 +188,7 @@ define noundef i64 @take_func_addr() local_unnamed_addr #0 !dbg !21 {
ret i64 add (i64 add (i64 ptrtoint (ptr @func_A to i64), i64 ptrtoint (ptr @func_B to i64)), i64 ptrtoint (ptr @func_C to i64)), !dbg !27
}
-attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
+attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}
@@ -155,3 +221,71 @@ attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp wil
!25 = !DILocalVariable(name: "val", scope: !21, file: !1, line: 9, type: !3)
!26 = !DILocation(line: 0, scope: !21)
!27 = !DILocation(line: 13, column: 5, scope: !21)
+
+;--- b.ll
+; ModuleID = 'b.cpp'
+source_filename = "b.cpp"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx11.0.0"
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
+define noundef i32 @func_D() #0 !dbg !12 {
+entry:
+ ret i32 1, !dbg !16
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
+define noundef i32 @func_E() #0 !dbg !17 {
+entry:
+ ret i32 1, !dbg !18
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
+define noundef i32 @func_F() #0 !dbg !19 {
+entry:
+ ret i32 1, !dbg !20
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync)
+define noundef i64 @take_func_addr_b() local_unnamed_addr #0 !dbg !21 {
+entry:
+ #dbg_value(i64 0, !25, !DIExpression(), !26)
+ #dbg_value(i64 ptrtoint (ptr @func_D to i64), !25, !DIExpression(), !26)
+ #dbg_value(i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), !25, !DIExpression(), !26)
+ #dbg_value(i64 add (i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), i64 ptrtoint (ptr @func_F to i64)), !25, !DIExpression(), !26)
+ ret i64 add (i64 add (i64 ptrtoint (ptr @func_D to i64), i64 ptrtoint (ptr @func_E to i64)), i64 ptrtoint (ptr @func_F to i64)), !dbg !27
+}
+
+attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind ssp willreturn memory(none) uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7, !8, !9, !10, !11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!1 = !DIFile(filename: "b.cpp", directory: "/proc/self/cwd")
+!2 = !{!3, !5}
+!3 = !DIDerivedType(tag: DW_TAG_typedef, name: "ULL", file: !1, line: 2, baseType: !4)
+!4 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)
+!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!6 = !{i32 7, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{i32 1, !"wchar_size", i32 4}
+!9 = !{i32 8, !"PIC Level", i32 2}
+!10 = !{i32 7, !"uwtable", i32 1}
+!11 = !{i32 7, !"frame-pointer", i32 1}
+!12 = distinct !DISubprogram(name: "func_D", scope: !1, file: !1, line: 5, type: !13, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!15}
+!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!16 = !DILocation(line: 5, column: 21, scope: !12)
+!17 = distinct !DISubprogram(name: "func_E", scope: !1, file: !1, line: 6, type: !13, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!18 = !DILocation(line: 6, column: 21, scope: !17)
+!19 = distinct !DISubprogram(name: "func_F", scope: !1, file: !1, line: 7, type: !13, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!20 = !DILocation(line: 7, column: 21, scope: !19)
+!21 = distinct !DISubprogram(name: "take_func_addr_b", scope: !1, file: !1, line: 9, type: !22, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !24)
+!22 = !DISubroutineType(types: !23)
+!23 = !{!3}
+!24 = !{!25}
+!25 = !DILocalVariable(name: "val", scope: !21, file: !1, line: 10, type: !3)
+!26 = !DILocation(line: 0, scope: !21)
+!27 = !DILocation(line: 14, column: 5, scope: !21)
More information about the llvm-commits
mailing list