[llvm] 2e2737c - [MC][MachO] Change addrsig format + ensure its size is properly set

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 19 18:22:46 PDT 2022


Author: Jez Ng
Date: 2022-07-19T21:22:23-04:00
New Revision: 2e2737cdf9983744fd894a5e5101429d74a67056

URL: https://github.com/llvm/llvm-project/commit/2e2737cdf9983744fd894a5e5101429d74a67056
DIFF: https://github.com/llvm/llvm-project/commit/2e2737cdf9983744fd894a5e5101429d74a67056.diff

LOG: [MC][MachO] Change addrsig format + ensure its size is properly set

There were two problems with the previous setup:

1. We weren't setting its size, which caused problems when `__llvm_addrsig`
   wasn't the last section. In particular, `__debug_line` (if created) is
   generated and placed after `__llvm_addrsig`, and would result in an
   invalid object file w/ overlapping sections being emitted.

2. The symbol indices could be invalidated if e.g. `llvm-strip` ran on
   the object file. See discussion [here][1].

To fix both these issues, we use symbol relocations instead of encoding
symbol indices directly in the section contents. The section itself
doesn't contain any data. That sidesteps the layout problem in addition
to solving the second issue.

The corresponding LLD change to read in this new format: {D128938}.
It will fix the icf-safe.ll test failure on this diff.

[1]: https://discourse.llvm.org/t/problems-with-mach-o-address-significance-table-generation/63392/

Reviewed By: #lld-macho, alx32

Differential Revision: https://reviews.llvm.org/D127637

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCMachObjectWriter.h
    llvm/lib/MC/MCMachOStreamer.cpp
    llvm/lib/MC/MachObjectWriter.cpp
    llvm/test/CodeGen/AArch64/addrsig-macho.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h
index 149373dd2b541..15e4652bc05da 100644
--- a/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -263,9 +263,9 @@ class MachObjectWriter : public MCObjectWriter {
                                               const MCFragment &FB, bool InSet,
                                               bool IsPCRel) const override;
 
-  uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+  void populateAddrSigSection(MCAssembler &Asm);
 
-  void writeAddrsigSection(MCAssembler &Asm);
+  uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
 };
 
 /// Construct a new Mach-O writer instance.

diff  --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index 9f22b9b0a866d..f358f593ff396 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -583,15 +583,27 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
   return S;
 }
 
-// Create the AddrSig section and first data fragment here as its layout needs
-// to be computed immediately after in order for it to be exported correctly.
+// The AddrSig section uses a series of relocations to refer to the symbols that
+// should be considered address-significant. The only interesting content of
+// these relocations is their symbol; the type, length etc will be ignored by
+// the linker. The reason we are not referring to the symbol indices directly is
+// that those indices will be invalidated by tools that update the symbol table.
+// Symbol relocations OTOH will have their indices updated by e.g. llvm-strip.
 void MCMachOStreamer::createAddrSigSection() {
   MCAssembler &Asm = getAssembler();
   MCObjectWriter &writer = Asm.getWriter();
   if (!writer.getEmitAddrsigSection())
     return;
+  // Create the AddrSig section and first data fragment here as its layout needs
+  // to be computed immediately after in order for it to be exported correctly.
   MCSection *AddrSigSection =
       Asm.getContext().getObjectFileInfo()->getAddrSigSection();
   Asm.registerSection(*AddrSigSection);
-  new MCDataFragment(AddrSigSection);
+  auto *Frag = new MCDataFragment(AddrSigSection);
+  // We will generate a series of pointer-sized symbol relocations at offset
+  // 0x0. Set the section size to be large enough to contain a single pointer
+  // (instead of emitting a zero-sized section) so these relocations are
+  // technically valid, even though we don't expect these relocations to
+  // actually be applied by the linker.
+  Frag->getContents().resize(8);
 }

diff  --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 78d0d9cec5562..038433cb24fac 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -753,32 +753,27 @@ static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
   llvm_unreachable("Invalid mc version min type");
 }
 
-// Encode addrsig data as symbol indexes in variable length encoding.
-void MachObjectWriter::writeAddrsigSection(MCAssembler &Asm) {
+void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) {
   MCSection *AddrSigSection =
       Asm.getContext().getObjectFileInfo()->getAddrSigSection();
-  MCSection::FragmentListType &fragmentList = AddrSigSection->getFragmentList();
-  if (!fragmentList.size())
-    return;
-
-  assert(fragmentList.size() == 1);
-  MCFragment *pFragment = &*fragmentList.begin();
-  MCDataFragment *pDataFragment = dyn_cast_or_null<MCDataFragment>(pFragment);
-  assert(pDataFragment);
-
-  raw_svector_ostream OS(pDataFragment->getContents());
-  for (const MCSymbol *sym : this->getAddrsigSyms())
-    encodeULEB128(sym->getIndex(), OS);
+  unsigned Log2Size = is64Bit() ? 3 : 2;
+  for (const MCSymbol *S : getAddrsigSyms()) {
+    MachO::any_relocation_info MRE;
+    MRE.r_word0 = 0;
+    MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28);
+    addRelocation(S, AddrSigSection, MRE);
+  }
 }
 
 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
                                        const MCAsmLayout &Layout) {
   uint64_t StartOffset = W.OS.tell();
 
+  populateAddrSigSection(Asm);
+
   // Compute symbol table information and bind symbol indices.
   computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
                      UndefinedSymbolData);
-  writeAddrsigSection(Asm);
 
   if (!Asm.CGProfile.empty()) {
     MCSection *CGProfileSection = Asm.getContext().getMachOSection(

diff  --git a/llvm/test/CodeGen/AArch64/addrsig-macho.ll b/llvm/test/CodeGen/AArch64/addrsig-macho.ll
index f16ccf117558b..360876fccaad3 100644
--- a/llvm/test/CodeGen/AArch64/addrsig-macho.ll
+++ b/llvm/test/CodeGen/AArch64/addrsig-macho.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -filetype=asm %s -o - -mtriple arm64-apple-ios -addrsig | FileCheck %s
 ; RUN: llc -filetype=obj %s -o %t -mtriple arm64-apple-ios -addrsig
-; RUN: llvm-readobj --hex-dump='__llvm_addrsig' %t | FileCheck %s --check-prefix=OBJ
+; RUN: llvm-objdump --macho --section-headers %t | FileCheck %s --check-prefix=SECTIONS
+; RUN: llvm-objdump --macho --reloc %t | FileCheck %s --check-prefix=RELOCS
 
 ; CHECK:			.addrsig{{$}}
 ; CHECK-NEXT:	.addrsig_sym _func03_takeaddr
@@ -11,8 +12,23 @@
 ; CHECK-NEXT:	.addrsig_sym _a1
 ; CHECK-NEXT:	.addrsig_sym _i1
 
-; OBJ: Hex dump of section '__llvm_addrsig':
-; OBJ-NEXT:0x{{[0-9a-f]*}}
+; The __debug_line section (which should be generated for the given input file)
+; should appear immediately after the addrsig section.  Use it to make sure
+; addrsig's section size has been properly set during section layout. This
+; catches a regression where the next section would overlap addrsig's
+; contents.
+; SECTIONS:      __llvm_addrsig 00000008          [[#%.16x,ADDR:]]
+; SECTIONS-NEXT: __debug_line   {{[[:xdigit:]]+}} [[#%.16x,8+ADDR]]
+
+; RELOCS: Relocation information (__DATA,__llvm_addrsig) 7 entries
+; RELOCS: address  pcrel length extern type    scattered symbolnum/value
+; RELOCS: 00000000 False ?( 3)  True   UNSIGND False     _i1
+; RELOCS: 00000000 False ?( 3)  True   UNSIGND False     _a1
+; RELOCS: 00000000 False ?( 3)  True   UNSIGND False     _g1
+; RELOCS: 00000000 False ?( 3)  True   UNSIGND False     _result
+; RELOCS: 00000000 False ?( 3)  True   UNSIGND False     _metadata_f2
+; RELOCS: 00000000 False ?( 3)  True   UNSIGND False     _f1
+; RELOCS: 00000000 False ?( 3)  True   UNSIGND False     _func03_takeaddr
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios7.0.0"
@@ -38,7 +54,7 @@ entry:
 }
 
 ; Function Attrs: minsize nofree noinline norecurse nounwind optsize ssp uwtable
-define void @func03_takeaddr() #0 {
+define void @func03_takeaddr() #0 !dbg !9 {
 entry:
   %0 = load volatile i32, ptr @result, align 4
   %add = add nsw i32 %0, 1
@@ -72,8 +88,8 @@ define void()* @f1() {
   %a2 = bitcast i32* @a2 to i8*
   %i1 = bitcast void()* @i1 to i8*
   %i2 = bitcast void()* @i2 to i8*
-  call void @llvm.dbg.value(metadata i8* bitcast (void()* @metadata_f1 to i8*), metadata !5, metadata !DIExpression()), !dbg !7
-  call void @llvm.dbg.value(metadata i8* bitcast (void()* @metadata_f2 to i8*), metadata !5, metadata !DIExpression()), !dbg !7
+  call void @llvm.dbg.value(metadata i8* bitcast (void()* @metadata_f1 to i8*), metadata !6, metadata !DIExpression()), !dbg !8
+  call void @llvm.dbg.value(metadata i8* bitcast (void()* @metadata_f2 to i8*), metadata !6, metadata !DIExpression()), !dbg !8
   call void @f4(i8* bitcast (void()* @metadata_f2 to i8*))
   unreachable
 }
@@ -108,9 +124,18 @@ declare void @f3() unnamed_addr
 declare void @llvm.dbg.value(metadata, metadata, metadata)
 
 attributes #0 = { noinline }
-
-!3 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
-!4 = !DILocation(line: 0, scope: !3)
-!5 = !DILocalVariable(scope: !6)
-!6 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
-!7 = !DILocation(line: 0, scope: !6, inlinedAt: !4)
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug)
+!1 = !DIFile(filename: "test.c", directory: "/tmp")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+
+!4 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false, unit: !0)
+!5 = !DILocation(line: 0, scope: !4)
+!6 = !DILocalVariable(scope: !7)
+!7 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false, unit: !0)
+!8 = !DILocation(line: 0, scope: !7, inlinedAt: !5)
+!9 = distinct !DISubprogram(scope: null, file: !1, line: 1, type: !10, unit: !0)
+!10 = !DISubroutineType(types: !{})


        


More information about the llvm-commits mailing list