[llvm-branch-commits] [lld] d6e5bfc - [lld-macho] Support EH frame pointer encodings that use sdata4

Mon Aug 8 15:38:38 PDT 2022

Author: Jez Ng
Date: 2022-08-08T15:36:33-07:00
New Revision: d6e5bfce508af9e4c3f9b7357a00f7c02e94657c

URL: https://github.com/llvm/llvm-project/commit/d6e5bfce508af9e4c3f9b7357a00f7c02e94657c
DIFF: https://github.com/llvm/llvm-project/commit/d6e5bfce508af9e4c3f9b7357a00f7c02e94657c.diff

LOG: [lld-macho] Support EH frame pointer encodings that use sdata4

Previously we only supporting using the system pointer size (aka the
`absptr` encoding) because `llvm-mc`'s CFI directives always generate EH
frames with that encoding. But libffi uses 4-byte-encoded, hand-rolled
EH frames, so this patch adds support for it.

Fixes #56576.

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D130804

(cherry picked from commit 6c9f6812523a706c11a12e6cb4119b0cf67bbb21)

Added: 
    lld/test/MachO/eh-frame-sdata4.s

Modified: 
    lld/MachO/EhFrame.cpp
    lld/MachO/EhFrame.h
    lld/MachO/InputFiles.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/EhFrame.cpp b/lld/MachO/EhFrame.cpp
index 50d8accc0596e..55a85f316cdd7 100644

--- a/lld/MachO/EhFrame.cpp
+++ b/lld/MachO/EhFrame.cpp
@@ -58,17 +58,17 @@ uint32_t EhReader::readU32(size_t *off) const {
   return v;
 }
 
-uint64_t EhReader::readPointer(size_t *off) const {
-  if (*off + wordSize > data.size())
+uint64_t EhReader::readPointer(size_t *off, uint8_t size) const {
+  if (*off + size > data.size())
     failOn(*off, "unexpected end of CIE/FDE");
   uint64_t v;
-  if (wordSize == 8)
+  if (size == 8)
     v = read64le(data.data() + *off);
   else {
-    assert(wordSize == 4);
+    assert(size == 4);
     v = read32le(data.data() + *off);
   }
-  *off += wordSize;
+  *off += size;
   return v;
 }
 

diff  --git a/lld/MachO/EhFrame.h b/lld/MachO/EhFrame.h
index c8269b941bcfc..609a3bb8b1fe3 100644
--- a/lld/MachO/EhFrame.h
+++ b/lld/MachO/EhFrame.h
@@ -55,9 +55,8 @@ namespace macho {
 
 class EhReader {
 public:
-  EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff,
-           size_t wordSize)
-      : file(file), data(data), dataOff(dataOff), wordSize(wordSize) {}
+  EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff)
+      : file(file), data(data), dataOff(dataOff) {}
   size_t size() const { return data.size(); }
   // Read and validate the length field.
   uint64_t readLength(size_t *off) const;
@@ -65,7 +64,7 @@ class EhReader {
   void skipValidLength(size_t *off) const;
   uint8_t readByte(size_t *off) const;
   uint32_t readU32(size_t *off) const;
-  uint64_t readPointer(size_t *off) const;
+  uint64_t readPointer(size_t *off, uint8_t size) const;
   StringRef readString(size_t *off) const;
   void skipLeb128(size_t *off) const;
   void failOn(size_t errOff, const Twine &msg) const;
@@ -76,7 +75,6 @@ class EhReader {
   // The offset of the data array within its section. Used only for error
   // reporting.
   const size_t dataOff;
-  size_t wordSize;
 };
 
 // The EH frame format, when emitted by llvm-mc, consists of a number of

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index cbb04a3bf04fa..fd0e4ec8834c4 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -385,7 +385,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
 }
 
 void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
-  EhReader reader(this, data, /*dataOff=*/0, target->wordSize);
+  EhReader reader(this, data, /*dataOff=*/0);
   size_t off = 0;
   while (off < reader.size()) {
     uint64_t frameOff = off;
@@ -1290,10 +1290,25 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
 
 struct CIE {
   macho::Symbol *personalitySymbol = nullptr;
-  bool fdesHaveLsda = false;
   bool fdesHaveAug = false;
+  uint8_t lsdaPtrSize = 0; // 0 => no LSDA
+  uint8_t funcPtrSize = 0;
 };
 
+static uint8_t pointerEncodingToSize(uint8_t enc) {
+  switch (enc & 0xf) {
+  case dwarf::DW_EH_PE_absptr:
+    return target->wordSize;
+  case dwarf::DW_EH_PE_sdata4:
+    return 4;
+  case dwarf::DW_EH_PE_sdata8:
+    // ld64 doesn't actually support sdata8, but this seems simple enough...
+    return 8;
+  default:
+    return 0;
+  };
+}
+
 static CIE parseCIE(const InputSection *isec, const EhReader &reader,
                     size_t off) {
   // Handling the full generality of possible DWARF encodings would be a major
@@ -1301,8 +1316,6 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader,
   // DWARF and handle just that.
   constexpr uint8_t expectedPersonalityEnc =
       dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
-  constexpr uint8_t expectedPointerEnc =
-      dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_absptr;
 
   CIE cie;
   uint8_t version = reader.readByte(&off);
@@ -1329,16 +1342,17 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader,
       break;
     }
     case 'L': {
-      cie.fdesHaveLsda = true;
       uint8_t lsdaEnc = reader.readByte(&off);
-      if (lsdaEnc != expectedPointerEnc)
+      cie.lsdaPtrSize = pointerEncodingToSize(lsdaEnc);
+      if (cie.lsdaPtrSize == 0)
         reader.failOn(off, "unexpected LSDA encoding 0x" +
                                Twine::utohexstr(lsdaEnc));
       break;
     }
     case 'R': {
       uint8_t pointerEnc = reader.readByte(&off);
-      if (pointerEnc != expectedPointerEnc)
+      cie.funcPtrSize = pointerEncodingToSize(pointerEnc);
+      if (cie.funcPtrSize == 0 || !(pointerEnc & dwarf::DW_EH_PE_pcrel))
         reader.failOn(off, "unexpected pointer encoding 0x" +
                                Twine::utohexstr(pointerEnc));
       break;
@@ -1468,7 +1482,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
     else if (isec->symbols[0]->value != 0)
       fatal("found symbol at unexpected offset in __eh_frame");
 
-    EhReader reader(this, isec->data, subsec.offset, target->wordSize);
+    EhReader reader(this, isec->data, subsec.offset);
     size_t dataOff = 0; // Offset from the start of the EH frame.
     reader.skipValidLength(&dataOff); // readLength() already validated this.
     // cieOffOff is the offset from the start of the EH frame to the cieOff
@@ -1507,20 +1521,20 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
       continue;
     }
 
+    assert(cieMap.count(cieIsec));
+    const CIE &cie = cieMap[cieIsec];
     // Offset of the function address within the EH frame.
     const size_t funcAddrOff = dataOff;
-    uint64_t funcAddr = reader.readPointer(&dataOff) + ehFrameSection.addr +
-                        isecOff + funcAddrOff;
-    uint32_t funcLength = reader.readPointer(&dataOff);
+    uint64_t funcAddr = reader.readPointer(&dataOff, cie.funcPtrSize) +
+                        ehFrameSection.addr + isecOff + funcAddrOff;
+    uint32_t funcLength = reader.readPointer(&dataOff, cie.funcPtrSize);
     size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
-    assert(cieMap.count(cieIsec));
-    const CIE &cie = cieMap[cieIsec];
     Optional<uint64_t> lsdaAddrOpt;
     if (cie.fdesHaveAug) {
       reader.skipLeb128(&dataOff);
       lsdaAddrOff = dataOff;
-      if (cie.fdesHaveLsda) {
-        uint64_t lsdaOff = reader.readPointer(&dataOff);
+      if (cie.lsdaPtrSize != 0) {
+        uint64_t lsdaOff = reader.readPointer(&dataOff, cie.lsdaPtrSize);
         if (lsdaOff != 0) // FIXME possible to test this?
           lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
       }

diff  --git a/lld/test/MachO/eh-frame-sdata4.s b/lld/test/MachO/eh-frame-sdata4.s
new file mode 100644
index 0000000000000..20eb6cfd6e0e3
--- /dev/null
+++ b/lld/test/MachO/eh-frame-sdata4.s
@@ -0,0 +1,80 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+
+## Test that we correctly handle the sdata4 DWARF pointer encoding. llvm-mc's
+## CFI directives always generate EH frames using the absptr (i.e. system
+## pointer size) encoding, but it is possible to hand-roll your own EH frames
+## that use the sdata4 encoding. For instance, libffi does this.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/sdata4.s -o %t/sdata4.o
+# RUN: %lld -lSystem %t/sdata4.o -o %t/sdata4
+# RUN: llvm-objdump --macho --syms --dwarf=frames %t/sdata4 | FileCheck %s
+
+# CHECK: SYMBOL TABLE:
+# CHECK: [[#%.16x,MAIN:]] g     F __TEXT,__text _main
+
+# CHECK: .eh_frame contents:
+# CHECK: 00000000 00000010 00000000 CIE
+# CHECK:   Format:                DWARF32
+# CHECK:   Version:               1
+# CHECK:   Augmentation:          "zR"
+# CHECK:   Code alignment factor: 1
+# CHECK:   Data alignment factor: 1
+# CHECK:   Return address column: 1
+# CHECK:   Augmentation data:     1B
+# CHECK:   DW_CFA_def_cfa: reg7 +8
+# CHECK:   CFA=reg7+8
+
+# CHECK: 00000014 00000010 00000018 FDE cie=00000000 pc=[[#%x,MAIN]]...[[#%x,MAIN+1]]
+# CHECK:   Format:       DWARF32
+# CHECK:   DW_CFA_GNU_args_size: +16
+# CHECK:   DW_CFA_nop:
+# CHECK:   0x[[#%x,MAIN]]: CFA=reg7+8
+
+#--- sdata4.s
+.globl  _main
+_main:
+  retq
+LmainEnd:
+
+.balign 4
+.section __TEXT,__eh_frame
+# Although we don't reference this EhFrame symbol directly, we must have at
+# least one non-local symbol in this section, otherwise llvm-mc generates bogus
+# subtractor relocations.
+EhFrame:
+LCieHdr:
+  .long LCieEnd - LCieStart
+LCieStart:
+  .long 0           # CIE ID
+  .byte 1           # CIE version
+  .ascii "zR\0"
+  .byte 1           # Code alignment
+  .byte 1           # Data alignment
+  .byte 1           # RA column
+  .byte 1           # Augmentation size
+  .byte 0x1b        # FDE pointer encoding (pcrel | sdata4)
+  .byte 0xc, 7, 8   # DW_CFA_def_cfa reg7 +8
+  .balign 4
+LCieEnd:
+
+LFdeHdr:
+  .long LFdeEnd - LFdeStart
+LFdeStart:
+  .long LFdeStart - LCieHdr
+  # The next two fields are longs instead of quads because of the sdata4
+  # encoding.
+  .long _main - .        # Function address
+  .long LmainEnd - _main # Function length
+  .byte 0
+  ## Insert DW_CFA_GNU_args_size to prevent ld64 from creating a compact unwind
+  ## entry to replace this FDE. Makes it easier for us to cross-check behavior
+  ## across the two linkers (LLD never bothers trying to synthesize compact
+  ## unwind if it is not already present).
+  .byte 0x2e, 0x10       # DW_CFA_GNU_args_size
+  .balign 4
+LFdeEnd:
+
+  .long 0 # terminator
+
+.subsections_via_symbols