[llvm] 2e2737c - [MC][MachO] Change addrsig format + ensure its size is properly set
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 19 18:22:46 PDT 2022
Author: Jez Ng
Date: 2022-07-19T21:22:23-04:00
New Revision: 2e2737cdf9983744fd894a5e5101429d74a67056
URL: https://github.com/llvm/llvm-project/commit/2e2737cdf9983744fd894a5e5101429d74a67056
DIFF: https://github.com/llvm/llvm-project/commit/2e2737cdf9983744fd894a5e5101429d74a67056.diff
LOG: [MC][MachO] Change addrsig format + ensure its size is properly set
There were two problems with the previous setup:
1. We weren't setting its size, which caused problems when `__llvm_addrsig`
wasn't the last section. In particular, `__debug_line` (if created) is
generated and placed after `__llvm_addrsig`, and would result in an
invalid object file w/ overlapping sections being emitted.
2. The symbol indices could be invalidated if e.g. `llvm-strip` ran on
the object file. See discussion [here][1].
To fix both these issues, we use symbol relocations instead of encoding
symbol indices directly in the section contents. The section itself
doesn't contain any data. That sidesteps the layout problem in addition
to solving the second issue.
The corresponding LLD change to read in this new format: {D128938}.
It will fix the icf-safe.ll test failure on this diff.
[1]: https://discourse.llvm.org/t/problems-with-mach-o-address-significance-table-generation/63392/
Reviewed By: #lld-macho, alx32
Differential Revision: https://reviews.llvm.org/D127637
Added:
Modified:
llvm/include/llvm/MC/MCMachObjectWriter.h
llvm/lib/MC/MCMachOStreamer.cpp
llvm/lib/MC/MachObjectWriter.cpp
llvm/test/CodeGen/AArch64/addrsig-macho.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h
index 149373dd2b541..15e4652bc05da 100644
--- a/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -263,9 +263,9 @@ class MachObjectWriter : public MCObjectWriter {
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
- uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+ void populateAddrSigSection(MCAssembler &Asm);
- void writeAddrsigSection(MCAssembler &Asm);
+ uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
};
/// Construct a new Mach-O writer instance.
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index 9f22b9b0a866d..f358f593ff396 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -583,15 +583,27 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
return S;
}
-// Create the AddrSig section and first data fragment here as its layout needs
-// to be computed immediately after in order for it to be exported correctly.
+// The AddrSig section uses a series of relocations to refer to the symbols that
+// should be considered address-significant. The only interesting content of
+// these relocations is their symbol; the type, length etc will be ignored by
+// the linker. The reason we are not referring to the symbol indices directly is
+// that those indices will be invalidated by tools that update the symbol table.
+// Symbol relocations OTOH will have their indices updated by e.g. llvm-strip.
void MCMachOStreamer::createAddrSigSection() {
MCAssembler &Asm = getAssembler();
MCObjectWriter &writer = Asm.getWriter();
if (!writer.getEmitAddrsigSection())
return;
+ // Create the AddrSig section and first data fragment here as its layout needs
+ // to be computed immediately after in order for it to be exported correctly.
MCSection *AddrSigSection =
Asm.getContext().getObjectFileInfo()->getAddrSigSection();
Asm.registerSection(*AddrSigSection);
- new MCDataFragment(AddrSigSection);
+ auto *Frag = new MCDataFragment(AddrSigSection);
+ // We will generate a series of pointer-sized symbol relocations at offset
+ // 0x0. Set the section size to be large enough to contain a single pointer
+ // (instead of emitting a zero-sized section) so these relocations are
+ // technically valid, even though we don't expect these relocations to
+ // actually be applied by the linker.
+ Frag->getContents().resize(8);
}
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 78d0d9cec5562..038433cb24fac 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -753,32 +753,27 @@ static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
llvm_unreachable("Invalid mc version min type");
}
-// Encode addrsig data as symbol indexes in variable length encoding.
-void MachObjectWriter::writeAddrsigSection(MCAssembler &Asm) {
+void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) {
MCSection *AddrSigSection =
Asm.getContext().getObjectFileInfo()->getAddrSigSection();
- MCSection::FragmentListType &fragmentList = AddrSigSection->getFragmentList();
- if (!fragmentList.size())
- return;
-
- assert(fragmentList.size() == 1);
- MCFragment *pFragment = &*fragmentList.begin();
- MCDataFragment *pDataFragment = dyn_cast_or_null<MCDataFragment>(pFragment);
- assert(pDataFragment);
-
- raw_svector_ostream OS(pDataFragment->getContents());
- for (const MCSymbol *sym : this->getAddrsigSyms())
- encodeULEB128(sym->getIndex(), OS);
+ unsigned Log2Size = is64Bit() ? 3 : 2;
+ for (const MCSymbol *S : getAddrsigSyms()) {
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = 0;
+ MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28);
+ addRelocation(S, AddrSigSection, MRE);
+ }
}
uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
uint64_t StartOffset = W.OS.tell();
+ populateAddrSigSection(Asm);
+
// Compute symbol table information and bind symbol indices.
computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
UndefinedSymbolData);
- writeAddrsigSection(Asm);
if (!Asm.CGProfile.empty()) {
MCSection *CGProfileSection = Asm.getContext().getMachOSection(
diff --git a/llvm/test/CodeGen/AArch64/addrsig-macho.ll b/llvm/test/CodeGen/AArch64/addrsig-macho.ll
index f16ccf117558b..360876fccaad3 100644
--- a/llvm/test/CodeGen/AArch64/addrsig-macho.ll
+++ b/llvm/test/CodeGen/AArch64/addrsig-macho.ll
@@ -1,6 +1,7 @@
; RUN: llc -filetype=asm %s -o - -mtriple arm64-apple-ios -addrsig | FileCheck %s
; RUN: llc -filetype=obj %s -o %t -mtriple arm64-apple-ios -addrsig
-; RUN: llvm-readobj --hex-dump='__llvm_addrsig' %t | FileCheck %s --check-prefix=OBJ
+; RUN: llvm-objdump --macho --section-headers %t | FileCheck %s --check-prefix=SECTIONS
+; RUN: llvm-objdump --macho --reloc %t | FileCheck %s --check-prefix=RELOCS
; CHECK: .addrsig{{$}}
; CHECK-NEXT: .addrsig_sym _func03_takeaddr
@@ -11,8 +12,23 @@
; CHECK-NEXT: .addrsig_sym _a1
; CHECK-NEXT: .addrsig_sym _i1
-; OBJ: Hex dump of section '__llvm_addrsig':
-; OBJ-NEXT:0x{{[0-9a-f]*}}
+; The __debug_line section (which should be generated for the given input file)
+; should appear immediately after the addrsig section. Use it to make sure
+; addrsig's section size has been properly set during section layout. This
+; catches a regression where the next section would overlap addrsig's
+; contents.
+; SECTIONS: __llvm_addrsig 00000008 [[#%.16x,ADDR:]]
+; SECTIONS-NEXT: __debug_line {{[[:xdigit:]]+}} [[#%.16x,8+ADDR]]
+
+; RELOCS: Relocation information (__DATA,__llvm_addrsig) 7 entries
+; RELOCS: address pcrel length extern type scattered symbolnum/value
+; RELOCS: 00000000 False ?( 3) True UNSIGND False _i1
+; RELOCS: 00000000 False ?( 3) True UNSIGND False _a1
+; RELOCS: 00000000 False ?( 3) True UNSIGND False _g1
+; RELOCS: 00000000 False ?( 3) True UNSIGND False _result
+; RELOCS: 00000000 False ?( 3) True UNSIGND False _metadata_f2
+; RELOCS: 00000000 False ?( 3) True UNSIGND False _f1
+; RELOCS: 00000000 False ?( 3) True UNSIGND False _func03_takeaddr
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios7.0.0"
@@ -38,7 +54,7 @@ entry:
}
; Function Attrs: minsize nofree noinline norecurse nounwind optsize ssp uwtable
-define void @func03_takeaddr() #0 {
+define void @func03_takeaddr() #0 !dbg !9 {
entry:
%0 = load volatile i32, ptr @result, align 4
%add = add nsw i32 %0, 1
@@ -72,8 +88,8 @@ define void()* @f1() {
%a2 = bitcast i32* @a2 to i8*
%i1 = bitcast void()* @i1 to i8*
%i2 = bitcast void()* @i2 to i8*
- call void @llvm.dbg.value(metadata i8* bitcast (void()* @metadata_f1 to i8*), metadata !5, metadata !DIExpression()), !dbg !7
- call void @llvm.dbg.value(metadata i8* bitcast (void()* @metadata_f2 to i8*), metadata !5, metadata !DIExpression()), !dbg !7
+ call void @llvm.dbg.value(metadata i8* bitcast (void()* @metadata_f1 to i8*), metadata !6, metadata !DIExpression()), !dbg !8
+ call void @llvm.dbg.value(metadata i8* bitcast (void()* @metadata_f2 to i8*), metadata !6, metadata !DIExpression()), !dbg !8
call void @f4(i8* bitcast (void()* @metadata_f2 to i8*))
unreachable
}
@@ -108,9 +124,18 @@ declare void @f3() unnamed_addr
declare void @llvm.dbg.value(metadata, metadata, metadata)
attributes #0 = { noinline }
-
-!3 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
-!4 = !DILocation(line: 0, scope: !3)
-!5 = !DILocalVariable(scope: !6)
-!6 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
-!7 = !DILocation(line: 0, scope: !6, inlinedAt: !4)
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug)
+!1 = !DIFile(filename: "test.c", directory: "/tmp")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+
+!4 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false, unit: !0)
+!5 = !DILocation(line: 0, scope: !4)
+!6 = !DILocalVariable(scope: !7)
+!7 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false, unit: !0)
+!8 = !DILocation(line: 0, scope: !7, inlinedAt: !5)
+!9 = distinct !DISubprogram(scope: null, file: !1, line: 1, type: !10, unit: !0)
+!10 = !DISubroutineType(types: !{})
More information about the llvm-commits
mailing list