[llvm] [BOLT] Ignore AArch64 markers outside their sections. (PR #74106)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 26 01:05:24 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-binary-utilities

Author: Jacob Bramley (jacobbramley)

<details>
<summary>Changes</summary>

AArch64 uses $d and $x symbols to delimit data embedded in code. However, sometimes we see $d symbols, typically in .eh_frame, with addresses that belong to different sections. These occasionally fall inside .text functions and cause BOLT to stop disassembling, which in turn causes DWARF CFA processing to fail.

As a workaround, we just ignore symbols with addresses outside the section they belong to. This behaviour is consistent with objdump and similar tools.

---
Full diff: https://github.com/llvm/llvm-project/pull/74106.diff


4 Files Affected:

- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+40-2) 
- (added) bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml (+56) 
- (added) bolt/test/AArch64/spurious-marker-symbol.test (+61) 
- (modified) llvm/include/llvm/Object/ObjectFile.h (+1-1) 


``````````diff
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index adacb50dc167c8..a5ad24fc92ccee 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -791,9 +791,47 @@ void RewriteInstance::discoverFileObjects() {
     BinarySection Section(*BC, *cantFail(Sym.getSection()));
     return Section.isAllocatable();
   };
+  auto checkSymbolInSection = [this](const SymbolInfo &S) {
+    // Sometimes, we encounter symbols with addresses outside their section. If
+    // such symbols happen to fall into another section, they can interfere with
+    // disassembly. Notably, this occurs with AArch64 marker symbols ($d and $t)
+    // that belong to .eh_frame, but end up pointing into .text.
+    // As a workaround, we ignore all symbols that lie outside their sections.
+    auto Section = cantFail(S.Symbol.getSection());
+
+    // Accept all absolute symbols.
+    if (Section == InputFile->section_end())
+      return true;
+
+    uint64_t SecStart = Section->getAddress();
+    uint64_t SecEnd = SecStart + Section->getSize();
+    uint64_t SymEnd = S.Address + ELFSymbolRef(S.Symbol).getSize();
+    if (S.Address >= SecStart && SymEnd <= SecEnd)
+      return true;
+
+    auto SymType = cantFail(S.Symbol.getType());
+    // Skip warnings for common benign cases.
+    if (opts::Verbosity < 1 && SymType == SymbolRef::ST_Other)
+      return false;  // E.g. ELF::STT_TLS.
+
+    auto SymName = S.Symbol.getName();
+    auto SecName = cantFail(S.Symbol.getSection())->getName();
+    BC->errs() << "BOLT-WARNING: ignoring symbol "
+      << (SymName ? *SymName : "[unnamed]")
+      << " at 0x"
+      << Twine::utohexstr(S.Address)
+      << ", which lies outside "
+      << (SecName ? *SecName : "[unnamed]")
+      << "\n";
+
+    return false;
+  };
   for (const SymbolRef &Symbol : InputFile->symbols())
-    if (isSymbolInMemory(Symbol))
-      SortedSymbols.push_back({cantFail(Symbol.getAddress()), Symbol});
+    if (isSymbolInMemory(Symbol)) {
+      SymbolInfo SymInfo{cantFail(Symbol.getAddress()), Symbol};
+      if (checkSymbolInSection(SymInfo))
+        SortedSymbols.push_back(SymInfo);
+    }
 
   auto CompareSymbols = [this](const SymbolInfo &A, const SymbolInfo &B) {
     if (A.Address != B.Address)
diff --git a/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml b/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml
new file mode 100644
index 00000000000000..446397b911d9e0
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml
@@ -0,0 +1,56 @@
+--- !ELF
+FileHeader:
+  Class:            ELFCLASS64
+  Data:             ELFDATA2LSB
+  Type:             ET_EXEC
+  Machine:          EM_AARCH64
+  Entry:            0x2a0000
+ProgramHeaders:
+  - Type:           PT_PHDR
+    Flags:          [ PF_R ]
+    VAddr:          0x40
+    Align:          0x8
+    FileSize:       0xa8
+    MemSize:        0xa8
+    Offset:         0x40
+  - Type:           PT_LOAD
+    Flags:          [ PF_R ]
+    VAddr:          0x0
+    Align:          0x10000
+    FileSize:       0xf8
+    MemSize:        0xf8
+    Offset:         0x0
+  - Type:           PT_LOAD
+    Flags:          [ PF_X, PF_R ]
+    VAddr:          0x2a0000
+    Align:          0x10000
+    FirstSec:       .text
+    LastSec:        .ignored
+Sections:
+  - Name:           .text
+    Type:           SHT_PROGBITS
+    Flags:          [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:        0x2a0000
+    AddressAlign:   0x4
+    Content:        400580d2c0035fd6
+  - Name:           .ignored
+    Type:           SHT_PROGBITS
+    Flags:          [ SHF_ALLOC ]
+    Address:        0x2a0008
+    AddressAlign:   0x8
+    Size:           0x8
+  - Name:           .eh_frame
+    Type:           SHT_PROGBITS
+    Flags:          [ SHF_ALLOC ]
+    Address:        0x2a0010
+    AddressAlign:   0x8
+    Content:        1000000000000000017a520004781e010b0c1f00140000001800000000002a0008000000000e01410e010000
+Symbols:
+  - Name:           func
+    Section:        .text
+    Value:          0x2a0000
+    Size:           0x8
+  - Name:           '$d.42'
+    Section:        .ignored
+    Value:          0x2a0004
+...
diff --git a/bolt/test/AArch64/spurious-marker-symbol.test b/bolt/test/AArch64/spurious-marker-symbol.test
new file mode 100644
index 00000000000000..901db08774a744
--- /dev/null
+++ b/bolt/test/AArch64/spurious-marker-symbol.test
@@ -0,0 +1,61 @@
+// Check that marker symbols ($d, $x) denoting data embedded in code are ignored
+// if they fall outside their respective sections.
+
+// RUN: yaml2obj %S/Inputs/spurious-marker-symbol.yaml -o %t.exe
+// RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+// CHECK: 1 out of 1 functions were overwritten
+// RUN: llvm-objdump -j .text -d %t.bolt | FileCheck %s -check-prefix=CHECK-DISASM
+// CHECK-DISASM: func
+// CHECK-DISASM: 2a0000: d2800540   mov
+// CHECK-DISASM: 2a0004: d65f03c0   ret
+
+// The YAML encodes the following assembly and debug information:
+
+  .text
+  .globl func
+  .type func, %function
+func:
+  mov    x0, #42
+// $d.42:    (symbol in .ignored, with an address in .text)
+  ret
+
+// .eh_frame contains minimal DWARF with a CFA operation on the `ret`. BOLT
+// should ignore the spurious `$d.42`. If it doesn't, then it will stop
+// disassembling after the `mov` and will fail to process the second
+// DW_CFA_def_cfa_offset.
+//
+// CIE
+//    length:                       00000010
+//    CIE_id:                       00000000
+//    version:                            01
+//    augmentation:
+//      "zR"                        7a 52 00
+//      - read augmentation data
+//      - read FDE pointer encoding
+//    code_alignment_factor:              04
+//    data_alignment_factor:              78  (-8)
+//    return_address_register:            1e  (r30 / lr)
+//
+//    augmentation data:
+//      length:                           01
+//      FDE pointers are absptr+sdata4    0b
+//
+//    initial_instructions:
+//      DW_CFA_def_cfa (31, 0):     0c 1f 00
+//
+// Encoding: 10000000'00000000'01'7a5200'04'78'1e'10'0b'0c1f00
+//
+// FDE
+//    length:                       00000014
+//    CIE_pointer:                  00000018  (backwards offset from here to CIE)
+//    initial_location:             002a0000  (`func` as absptr+sdata4)
+//    address_range:                00000008
+//    augmentation data:
+//      length:                           00
+//    instructions:
+//      DW_CFA_def_cfa_offset (1)      0e 01
+//      DW_CFA_advance_loc (1)            41  (`ret` at 0x2a0004)
+//      DW_CFA_def_cfa_offset (1)      0e 01  Fails unless $d.42 is ignored.
+//      DW_CFA_nop                     00 00
+//
+// Encoding: 14000000'18000000'00002a00'08000000'000e0141'0e010000
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index f49763e31a9c76..20c0ef5ccfcea2 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -199,7 +199,7 @@ class SymbolRef : public BasicSymbolRef {
   Expected<SymbolRef::Type> getType() const;
 
   /// Get section this symbol is defined in reference to. Result is
-  /// end_sections() if it is undefined or is an absolute symbol.
+  /// section_end() if it is undefined or is an absolute symbol.
   Expected<section_iterator> getSection() const;
 
   const ObjectFile *getObject() const;

``````````

</details>


https://github.com/llvm/llvm-project/pull/74106


More information about the llvm-commits mailing list