[llvm] [BOLT] Ignore AArch64 markers outside their sections. (PR #74106)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 01:05:24 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-binary-utilities
Author: Jacob Bramley (jacobbramley)
<details>
<summary>Changes</summary>
AArch64 uses $d and $x symbols to delimit data embedded in code. However, sometimes we see $d symbols, typically in .eh_frame, with addresses that belong to different sections. These occasionally fall inside .text functions and cause BOLT to stop disassembling, which in turn causes DWARF CFA processing to fail.
As a workaround, we just ignore symbols with addresses outside the section they belong to. This behaviour is consistent with objdump and similar tools.
---
Full diff: https://github.com/llvm/llvm-project/pull/74106.diff
4 Files Affected:
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+40-2)
- (added) bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml (+56)
- (added) bolt/test/AArch64/spurious-marker-symbol.test (+61)
- (modified) llvm/include/llvm/Object/ObjectFile.h (+1-1)
``````````diff
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index adacb50dc167c8..a5ad24fc92ccee 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -791,9 +791,47 @@ void RewriteInstance::discoverFileObjects() {
BinarySection Section(*BC, *cantFail(Sym.getSection()));
return Section.isAllocatable();
};
+ auto checkSymbolInSection = [this](const SymbolInfo &S) {
+ // Sometimes, we encounter symbols with addresses outside their section. If
+ // such symbols happen to fall into another section, they can interfere with
+ // disassembly. Notably, this occurs with AArch64 marker symbols ($d and $t)
+ // that belong to .eh_frame, but end up pointing into .text.
+ // As a workaround, we ignore all symbols that lie outside their sections.
+ auto Section = cantFail(S.Symbol.getSection());
+
+ // Accept all absolute symbols.
+ if (Section == InputFile->section_end())
+ return true;
+
+ uint64_t SecStart = Section->getAddress();
+ uint64_t SecEnd = SecStart + Section->getSize();
+ uint64_t SymEnd = S.Address + ELFSymbolRef(S.Symbol).getSize();
+ if (S.Address >= SecStart && SymEnd <= SecEnd)
+ return true;
+
+ auto SymType = cantFail(S.Symbol.getType());
+ // Skip warnings for common benign cases.
+ if (opts::Verbosity < 1 && SymType == SymbolRef::ST_Other)
+ return false; // E.g. ELF::STT_TLS.
+
+ auto SymName = S.Symbol.getName();
+ auto SecName = cantFail(S.Symbol.getSection())->getName();
+ BC->errs() << "BOLT-WARNING: ignoring symbol "
+ << (SymName ? *SymName : "[unnamed]")
+ << " at 0x"
+ << Twine::utohexstr(S.Address)
+ << ", which lies outside "
+ << (SecName ? *SecName : "[unnamed]")
+ << "\n";
+
+ return false;
+ };
for (const SymbolRef &Symbol : InputFile->symbols())
- if (isSymbolInMemory(Symbol))
- SortedSymbols.push_back({cantFail(Symbol.getAddress()), Symbol});
+ if (isSymbolInMemory(Symbol)) {
+ SymbolInfo SymInfo{cantFail(Symbol.getAddress()), Symbol};
+ if (checkSymbolInSection(SymInfo))
+ SortedSymbols.push_back(SymInfo);
+ }
auto CompareSymbols = [this](const SymbolInfo &A, const SymbolInfo &B) {
if (A.Address != B.Address)
diff --git a/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml b/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml
new file mode 100644
index 00000000000000..446397b911d9e0
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml
@@ -0,0 +1,56 @@
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_AARCH64
+ Entry: 0x2a0000
+ProgramHeaders:
+ - Type: PT_PHDR
+ Flags: [ PF_R ]
+ VAddr: 0x40
+ Align: 0x8
+ FileSize: 0xa8
+ MemSize: 0xa8
+ Offset: 0x40
+ - Type: PT_LOAD
+ Flags: [ PF_R ]
+ VAddr: 0x0
+ Align: 0x10000
+ FileSize: 0xf8
+ MemSize: 0xf8
+ Offset: 0x0
+ - Type: PT_LOAD
+ Flags: [ PF_X, PF_R ]
+ VAddr: 0x2a0000
+ Align: 0x10000
+ FirstSec: .text
+ LastSec: .ignored
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x2a0000
+ AddressAlign: 0x4
+ Content: 400580d2c0035fd6
+ - Name: .ignored
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x2a0008
+ AddressAlign: 0x8
+ Size: 0x8
+ - Name: .eh_frame
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x2a0010
+ AddressAlign: 0x8
+ Content: 1000000000000000017a520004781e010b0c1f00140000001800000000002a0008000000000e01410e010000
+Symbols:
+ - Name: func
+ Section: .text
+ Value: 0x2a0000
+ Size: 0x8
+ - Name: '$d.42'
+ Section: .ignored
+ Value: 0x2a0004
+...
diff --git a/bolt/test/AArch64/spurious-marker-symbol.test b/bolt/test/AArch64/spurious-marker-symbol.test
new file mode 100644
index 00000000000000..901db08774a744
--- /dev/null
+++ b/bolt/test/AArch64/spurious-marker-symbol.test
@@ -0,0 +1,61 @@
+// Check that marker symbols ($d, $x) denoting data embedded in code are ignored
+// if they fall outside their respective sections.
+
+// RUN: yaml2obj %S/Inputs/spurious-marker-symbol.yaml -o %t.exe
+// RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+// CHECK: 1 out of 1 functions were overwritten
+// RUN: llvm-objdump -j .text -d %t.bolt | FileCheck %s -check-prefix=CHECK-DISASM
+// CHECK-DISASM: func
+// CHECK-DISASM: 2a0000: d2800540 mov
+// CHECK-DISASM: 2a0004: d65f03c0 ret
+
+// The YAML encodes the following assembly and debug information:
+
+ .text
+ .globl func
+ .type func, %function
+func:
+ mov x0, #42
+// $d.42: (symbol in .ignored, with an address in .text)
+ ret
+
+// .eh_frame contains minimal DWARF with a CFA operation on the `ret`. BOLT
+// should ignore the spurious `$d.42`. If it doesn't, then it will stop
+// disassembling after the `mov` and will fail to process the second
+// DW_CFA_def_cfa_offset.
+//
+// CIE
+// length: 00000010
+// CIE_id: 00000000
+// version: 01
+// augmentation:
+// "zR" 7a 52 00
+// - read augmentation data
+// - read FDE pointer encoding
+// code_alignment_factor: 04
+// data_alignment_factor: 78 (-8)
+// return_address_register: 1e (r30 / lr)
+//
+// augmentation data:
+// length: 01
+// FDE pointers are absptr+sdata4 0b
+//
+// initial_instructions:
+// DW_CFA_def_cfa (31, 0): 0c 1f 00
+//
+// Encoding: 10000000'00000000'01'7a5200'04'78'1e'10'0b'0c1f00
+//
+// FDE
+// length: 00000014
+// CIE_pointer: 00000018 (backwards offset from here to CIE)
+// initial_location: 002a0000 (`func` as absptr+sdata4)
+// address_range: 00000008
+// augmentation data:
+// length: 00
+// instructions:
+// DW_CFA_def_cfa_offset (1) 0e 01
+// DW_CFA_advance_loc (1) 41 (`ret` at 0x2a0004)
+// DW_CFA_def_cfa_offset (1) 0e 01 Fails unless $d.42 is ignored.
+// DW_CFA_nop 00 00
+//
+// Encoding: 14000000'18000000'00002a00'08000000'000e0141'0e010000
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index f49763e31a9c76..20c0ef5ccfcea2 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -199,7 +199,7 @@ class SymbolRef : public BasicSymbolRef {
Expected<SymbolRef::Type> getType() const;
/// Get section this symbol is defined in reference to. Result is
- /// end_sections() if it is undefined or is an absolute symbol.
+ /// section_end() if it is undefined or is an absolute symbol.
Expected<section_iterator> getSection() const;
const ObjectFile *getObject() const;
``````````
</details>
https://github.com/llvm/llvm-project/pull/74106
More information about the llvm-commits
mailing list