[llvm] [BOLT] Ignore AArch64 markers outside their sections. (PR #74106)

Jacob Bramley via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 1 09:00:31 PST 2023


https://github.com/jacobbramley created https://github.com/llvm/llvm-project/pull/74106

AArch64 uses $d and $x symbols to delimit data embedded in code. However, sometimes we see $d symbols, typically in .eh_frame, with addresses that belong to different sections. These occasionally fall inside .text functions and cause BOLT to stop disassembling, which in turn causes DWARF CFA processing to fail.

As a workaround, we just ignore symbols with addresses outside the section they belong to. This behaviour is consistent with objdump and similar tools.

>From 7f43b6bb0ad004a0f084e90141cc81d79e6643b3 Mon Sep 17 00:00:00 2001
From: Jacob Bramley <jacob.bramley at arm.com>
Date: Tue, 21 Nov 2023 12:27:18 +0000
Subject: [PATCH] [BOLT] Ignore AArch64 markers outside their sections.

AArch64 uses $d and $x symbols to delimit data embedded in code.
However, sometimes we see $d symbols, typically in .eh_frame, with
addresses that belong to different sections. These occasionally fall
inside .text functions and cause BOLT to stop disassembling, which in
turn causes DWARF CFA processing to fail.

As a workaround, we just ignore symbols with addresses outside the
section they belong to. This behaviour is consistent with objdump and
similar tools.
---
 bolt/lib/Rewrite/RewriteInstance.cpp          | 16 ++++-
 .../Inputs/spurious-marker-symbol.yaml        | 56 +++++++++++++++++
 bolt/test/AArch64/spurious-marker-symbol.test | 61 +++++++++++++++++++
 3 files changed, 132 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml
 create mode 100644 bolt/test/AArch64/spurious-marker-symbol.test

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 81c9cbff726bb9a..135d8ab4fa6de93 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -865,6 +865,20 @@ void RewriteInstance::discoverFileObjects() {
 
   std::vector<MarkerSym> SortedMarkerSymbols;
   auto addExtraDataMarkerPerSymbol = [&]() {
+    // Some ELFs have marker symbols with addresses outside their section.
+    // This occurs, for example, with some `.eh_frame` symbols, and sometimes
+    // (if rarely) they interfere with the disassembly of `.text` functions. As
+    // a workaround, we ignore all symbols that lie outside their sections.
+    auto considerSymbol = [](const SymbolInfo &S) {
+      auto SectionOrError = S.Symbol.getSection();
+      if (SectionOrError) {
+        uint64_t SecStart = (*SectionOrError)->getAddress();
+        uint64_t SecEnd = SecStart + (*SectionOrError)->getSize();
+        return (S.Address >= SecStart && S.Address < SecEnd);
+      }
+      return true;
+    };
+
     bool IsData = false;
     uint64_t LastAddr = 0;
     for (const auto &SymInfo : SortedSymbols) {
@@ -872,7 +886,7 @@ void RewriteInstance::discoverFileObjects() {
         continue;
 
       MarkerSymType MarkerType = BC->getMarkerType(SymInfo.Symbol);
-      if (MarkerType != MarkerSymType::NONE) {
+      if (considerSymbol(SymInfo) && MarkerType != MarkerSymType::NONE) {
         SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
         LastAddr = SymInfo.Address;
         IsData = MarkerType == MarkerSymType::DATA;
diff --git a/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml b/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml
new file mode 100644
index 000000000000000..446397b911d9e00
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/spurious-marker-symbol.yaml
@@ -0,0 +1,56 @@
+--- !ELF
+FileHeader:
+  Class:            ELFCLASS64
+  Data:             ELFDATA2LSB
+  Type:             ET_EXEC
+  Machine:          EM_AARCH64
+  Entry:            0x2a0000
+ProgramHeaders:
+  - Type:           PT_PHDR
+    Flags:          [ PF_R ]
+    VAddr:          0x40
+    Align:          0x8
+    FileSize:       0xa8
+    MemSize:        0xa8
+    Offset:         0x40
+  - Type:           PT_LOAD
+    Flags:          [ PF_R ]
+    VAddr:          0x0
+    Align:          0x10000
+    FileSize:       0xf8
+    MemSize:        0xf8
+    Offset:         0x0
+  - Type:           PT_LOAD
+    Flags:          [ PF_X, PF_R ]
+    VAddr:          0x2a0000
+    Align:          0x10000
+    FirstSec:       .text
+    LastSec:        .ignored
+Sections:
+  - Name:           .text
+    Type:           SHT_PROGBITS
+    Flags:          [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:        0x2a0000
+    AddressAlign:   0x4
+    Content:        400580d2c0035fd6
+  - Name:           .ignored
+    Type:           SHT_PROGBITS
+    Flags:          [ SHF_ALLOC ]
+    Address:        0x2a0008
+    AddressAlign:   0x8
+    Size:           0x8
+  - Name:           .eh_frame
+    Type:           SHT_PROGBITS
+    Flags:          [ SHF_ALLOC ]
+    Address:        0x2a0010
+    AddressAlign:   0x8
+    Content:        1000000000000000017a520004781e010b0c1f00140000001800000000002a0008000000000e01410e010000
+Symbols:
+  - Name:           func
+    Section:        .text
+    Value:          0x2a0000
+    Size:           0x8
+  - Name:           '$d.42'
+    Section:        .ignored
+    Value:          0x2a0004
+...
diff --git a/bolt/test/AArch64/spurious-marker-symbol.test b/bolt/test/AArch64/spurious-marker-symbol.test
new file mode 100644
index 000000000000000..901db08774a7442
--- /dev/null
+++ b/bolt/test/AArch64/spurious-marker-symbol.test
@@ -0,0 +1,61 @@
+// Check that marker symbols ($d, $x) denoting data embedded in code are ignored
+// if they fall outside their respective sections.
+
+// RUN: yaml2obj %S/Inputs/spurious-marker-symbol.yaml -o %t.exe
+// RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+// CHECK: 1 out of 1 functions were overwritten
+// RUN: llvm-objdump -j .text -d %t.bolt | FileCheck %s -check-prefix=CHECK-DISASM
+// CHECK-DISASM: func
+// CHECK-DISASM: 2a0000: d2800540   mov
+// CHECK-DISASM: 2a0004: d65f03c0   ret
+
+// The YAML encodes the following assembly and debug information:
+
+  .text
+  .globl func
+  .type func, %function
+func:
+  mov    x0, #42
+// $d.42:    (symbol in .ignored, with an address in .text)
+  ret
+
+// .eh_frame contains minimal DWARF with a CFA operation on the `ret`. BOLT
+// should ignore the spurious `$d.42`. If it doesn't, then it will stop
+// disassembling after the `mov` and will fail to process the second
+// DW_CFA_def_cfa_offset.
+//
+// CIE
+//    length:                       00000010
+//    CIE_id:                       00000000
+//    version:                            01
+//    augmentation:
+//      "zR"                        7a 52 00
+//      - read augmentation data
+//      - read FDE pointer encoding
+//    code_alignment_factor:              04
+//    data_alignment_factor:              78  (-8)
+//    return_address_register:            1e  (r30 / lr)
+//
+//    augmentation data:
+//      length:                           01
+//      FDE pointers are absptr+sdata4    0b
+//
+//    initial_instructions:
+//      DW_CFA_def_cfa (31, 0):     0c 1f 00
+//
+// Encoding: 10000000'00000000'01'7a5200'04'78'1e'10'0b'0c1f00
+//
+// FDE
+//    length:                       00000014
+//    CIE_pointer:                  00000018  (backwards offset from here to CIE)
+//    initial_location:             002a0000  (`func` as absptr+sdata4)
+//    address_range:                00000008
+//    augmentation data:
+//      length:                           00
+//    instructions:
+//      DW_CFA_def_cfa_offset (1)      0e 01
+//      DW_CFA_advance_loc (1)            41  (`ret` at 0x2a0004)
+//      DW_CFA_def_cfa_offset (1)      0e 01  Fails unless $d.42 is ignored.
+//      DW_CFA_nop                     00 00
+//
+// Encoding: 14000000'18000000'00002a00'08000000'000e0141'0e010000



More information about the llvm-commits mailing list