[llvm] 686d8ce - [llvm-objdump] Complete -chained_fixups support

Daniel Bertalan via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 24 10:29:36 PDT 2022


Author: Daniel Bertalan
Date: 2022-08-24T19:29:11+02:00
New Revision: 686d8ce1ab16171c66973e065eada5b6419a0c98

URL: https://github.com/llvm/llvm-project/commit/686d8ce1ab16171c66973e065eada5b6419a0c98
DIFF: https://github.com/llvm/llvm-project/commit/686d8ce1ab16171c66973e065eada5b6419a0c98.diff

LOG: [llvm-objdump] Complete -chained_fixups support

This commit adds definitions for the `dyld_chained_import*` structs.
The imports array is now printed with `llvm-otool -chained_fixups`. This
completes this option's implementation.

A slight difference from cctools otool is that we don't yet dump the
raw bytes of the imports entries.

When Apple's effort to upstream their chained fixups code continues,
we'll replace this code with the then-upstreamed code. But we need
something in the meantime for testing ld64.lld's chained fixups code.

Differential Revision: https://reviews.llvm.org/D131982

Added: 
    

Modified: 
    llvm/include/llvm/BinaryFormat/MachO.h
    llvm/include/llvm/Object/MachO.h
    llvm/lib/Object/MachOObjectFile.cpp
    llvm/test/tools/llvm-objdump/MachO/chained-fixups.test
    llvm/tools/llvm-objdump/MachODump.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h
index 8fcaeb0eb1b05..a9fc7954d52ce 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/llvm/include/llvm/BinaryFormat/MachO.h
@@ -1079,6 +1079,30 @@ struct dyld_chained_starts_in_segment {
                               ///< DYLD_CHAINED_PTR_START_NONE if no fixups
 };
 
+// DYLD_CHAINED_IMPORT
+struct dyld_chained_import {
+  uint32_t lib_ordinal : 8;
+  uint32_t weak_import : 1;
+  uint32_t name_offset : 23;
+};
+
+// DYLD_CHAINED_IMPORT_ADDEND
+struct dyld_chained_import_addend {
+  uint32_t lib_ordinal : 8;
+  uint32_t weak_import : 1;
+  uint32_t name_offset : 23;
+  int32_t addend;
+};
+
+// DYLD_CHAINED_IMPORT_ADDEND64
+struct dyld_chained_import_addend64 {
+  uint64_t lib_ordinal : 16;
+  uint64_t weak_import : 1;
+  uint64_t reserved : 15;
+  uint64_t name_offset : 32;
+  uint64_t addend;
+};
+
 // Byte order swapping functions for MachO structs
 
 inline void swapStruct(fat_header &mh) {

diff  --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index be44388943fa4..ee928c1175a99 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -274,12 +274,13 @@ using bind_iterator = content_iterator<MachOBindEntry>;
 ///     symbol. E.g., C++'s "operator new". This is called a "weak bind."
 struct ChainedFixupTarget {
 public:
-  ChainedFixupTarget(int LibOrdinal, StringRef Symbol, uint64_t Addend,
-                     bool WeakImport)
-      : LibOrdinal(LibOrdinal), SymbolName(Symbol), Addend(Addend),
-        WeakImport(WeakImport) {}
+  ChainedFixupTarget(int LibOrdinal, uint32_t NameOffset, StringRef Symbol,
+                     uint64_t Addend, bool WeakImport)
+      : LibOrdinal(LibOrdinal), NameOffset(NameOffset), SymbolName(Symbol),
+        Addend(Addend), WeakImport(WeakImport) {}
 
   int libOrdinal() { return LibOrdinal; }
+  uint32_t nameOffset() { return NameOffset; }
   StringRef symbolName() { return SymbolName; }
   uint64_t addend() { return Addend; }
   bool weakImport() { return WeakImport; }
@@ -289,6 +290,7 @@ struct ChainedFixupTarget {
 
 private:
   int LibOrdinal;
+  uint32_t NameOffset;
   StringRef SymbolName;
   uint64_t Addend;
   bool WeakImport;

diff  --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index d7e9b57923b8e..0c202b2f189a3 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/bit.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Error.h"
@@ -4924,15 +4925,122 @@ MachOObjectFile::getChainedFixupsSegments() const {
   return std::make_pair(ImageStarts.seg_count, Segments);
 }
 
+// The special library ordinals have a negative value, but they are encoded in
+// an unsigned bitfield, so we need to sign extend the value.
+template <typename T> static int getEncodedOrdinal(T Value) {
+  if (Value == static_cast<T>(MachO::BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE) ||
+      Value == static_cast<T>(MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) ||
+      Value == static_cast<T>(MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP))
+    return SignExtend32<sizeof(T) * CHAR_BIT>(Value);
+  return Value;
+}
+
+template <typename T, unsigned N>
+static std::array<T, N> getArray(const MachOObjectFile &O, const void *Ptr) {
+  std::array<T, N> RawValue;
+  memcpy(RawValue.data(), Ptr, N * sizeof(T));
+  if (O.isLittleEndian() != sys::IsLittleEndianHost)
+    for (auto &Element : RawValue)
+      sys::swapByteOrder(Element);
+  return RawValue;
+}
+
 Expected<std::vector<ChainedFixupTarget>>
 MachOObjectFile::getDyldChainedFixupTargets() const {
+  auto CFOrErr = getChainedFixupsLoadCommand();
+  if (!CFOrErr)
+    return CFOrErr.takeError();
+
+  std::vector<ChainedFixupTarget> Targets;
+  if (!CFOrErr->has_value())
+    return Targets;
+
+  const MachO::linkedit_data_command &DyldChainedFixups = **CFOrErr;
+
   auto CFHeaderOrErr = getChainedFixupsHeader();
   if (!CFHeaderOrErr)
     return CFHeaderOrErr.takeError();
-  std::vector<ChainedFixupTarget> Targets;
   if (!(*CFHeaderOrErr))
     return Targets;
-  return Targets;
+  const MachO::dyld_chained_fixups_header &Header = **CFHeaderOrErr;
+
+  size_t ImportSize = 0;
+  if (Header.imports_format == MachO::DYLD_CHAINED_IMPORT)
+    ImportSize = sizeof(MachO::dyld_chained_import);
+  else if (Header.imports_format == MachO::DYLD_CHAINED_IMPORT_ADDEND)
+    ImportSize = sizeof(MachO::dyld_chained_import_addend);
+  else if (Header.imports_format == MachO::DYLD_CHAINED_IMPORT_ADDEND64)
+    ImportSize = sizeof(MachO::dyld_chained_import_addend64);
+  else
+    return malformedError("bad chained fixups: unknown imports format: " +
+                          Twine(Header.imports_format));
+
+  const char *Contents = getPtr(*this, DyldChainedFixups.dataoff);
+  const char *Imports = Contents + Header.imports_offset;
+  size_t ImportsEndOffset =
+      Header.imports_offset + ImportSize * Header.imports_count;
+  const char *ImportsEnd = Contents + ImportsEndOffset;
+  const char *Symbols = Contents + Header.symbols_offset;
+  const char *SymbolsEnd = Contents + DyldChainedFixups.datasize;
+
+  if (ImportsEnd > Symbols)
+    return malformedError("bad chained fixups: imports end " +
+                          Twine(ImportsEndOffset) + " extends past end " +
+                          Twine(DyldChainedFixups.datasize));
+
+  if (ImportsEnd > Symbols)
+    return malformedError("bad chained fixups: imports end " +
+                          Twine(ImportsEndOffset) + " overlaps with symbols");
+
+  // We use bit manipulation to extract data from the bitfields. This is correct
+  // for both LE and BE hosts, but we assume that the object is little-endian.
+  if (!isLittleEndian())
+    return createError("parsing big-endian chained fixups is not implemented");
+  for (const char *ImportPtr = Imports; ImportPtr < ImportsEnd;
+       ImportPtr += ImportSize) {
+    int LibOrdinal;
+    bool WeakImport;
+    uint32_t NameOffset;
+    uint64_t Addend;
+    if (Header.imports_format == MachO::DYLD_CHAINED_IMPORT) {
+      static_assert(sizeof(uint32_t) == sizeof(MachO::dyld_chained_import));
+      auto RawValue = getArray<uint32_t, 1>(*this, ImportPtr);
+
+      LibOrdinal = getEncodedOrdinal<uint8_t>(RawValue[0] & 0xFF);
+      WeakImport = (RawValue[0] >> 8) & 1;
+      NameOffset = RawValue[0] >> 9;
+      Addend = 0;
+    } else if (Header.imports_format == MachO::DYLD_CHAINED_IMPORT_ADDEND) {
+      static_assert(sizeof(uint64_t) ==
+                    sizeof(MachO::dyld_chained_import_addend));
+      auto RawValue = getArray<uint32_t, 2>(*this, ImportPtr);
+
+      LibOrdinal = getEncodedOrdinal<uint8_t>(RawValue[0] & 0xFF);
+      WeakImport = (RawValue[0] >> 8) & 1;
+      NameOffset = RawValue[0] >> 9;
+      Addend = bit_cast<int32_t>(RawValue[1]);
+    } else if (Header.imports_format == MachO::DYLD_CHAINED_IMPORT_ADDEND64) {
+      static_assert(2 * sizeof(uint64_t) ==
+                    sizeof(MachO::dyld_chained_import_addend64));
+      auto RawValue = getArray<uint64_t, 2>(*this, ImportPtr);
+
+      LibOrdinal = getEncodedOrdinal<uint16_t>(RawValue[0] & 0xFFFF);
+      NameOffset = (RawValue[0] >> 16) & 1;
+      WeakImport = RawValue[0] >> 17;
+      Addend = RawValue[1];
+    } else {
+      llvm_unreachable("Import format should have been checked");
+    }
+
+    const char *Str = Symbols + NameOffset;
+    if (Str >= SymbolsEnd)
+      return malformedError("bad chained fixups: symbol offset " +
+                            Twine(NameOffset) + " extends past end " +
+                            Twine(DyldChainedFixups.datasize));
+    Targets.emplace_back(LibOrdinal, NameOffset, Str, Addend, WeakImport);
+  }
+
+  return std::move(Targets);
 }
 
 ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {

diff  --git a/llvm/test/tools/llvm-objdump/MachO/chained-fixups.test b/llvm/test/tools/llvm-objdump/MachO/chained-fixups.test
index 72460fb8a3405..ed78e2ef4e0a5 100644
--- a/llvm/test/tools/llvm-objdump/MachO/chained-fixups.test
+++ b/llvm/test/tools/llvm-objdump/MachO/chained-fixups.test
@@ -43,8 +43,30 @@ DETAILS-NEXT:     page_start[0] = 0
 DETAILS-NEXT:     page_start[1] = 32
 DETAILS-NEXT:     page_start[2] = 65535 (DYLD_CHAINED_PTR_START_NONE)
 DETAILS-NEXT:     page_start[3] = 32
+DETAILS-NEXT: dyld chained import[0]
+DETAILS-NEXT:   lib_ordinal = -2 (flat-namespace)
+DETAILS-NEXT:   weak_import = 0
+DETAILS-NEXT:   name_offset = 1 (_dynamicLookup)
+DETAILS-NEXT: dyld chained import[1]
+DETAILS-NEXT:   lib_ordinal = 1 (libdylib)
+DETAILS-NEXT:   weak_import = 1
+DETAILS-NEXT:   name_offset = 16 (_weakImport)
+DETAILS-NEXT: dyld chained import[2]
+DETAILS-NEXT:   lib_ordinal = 1 (libdylib)
+DETAILS-NEXT:   weak_import = 0
+DETAILS-NEXT:   name_offset = 28 (_dylib)
+DETAILS-NEXT: dyld chained import[3]
+DETAILS-NEXT:   lib_ordinal = -3 (weak)
+DETAILS-NEXT:   weak_import = 0
+DETAILS-NEXT:   name_offset = 35 (_weakLocal)
+DETAILS-NEXT: dyld chained import[4]
+DETAILS-NEXT:   lib_ordinal = -3 (weak)
+DETAILS-NEXT:   weak_import = 0
+DETAILS-NEXT:   name_offset = 46 (_weak)
 
 ## This test checks that the output is identical to that of cctools-1001.2 (XCode 14)
+## FIXME: Print encoded values of the dyld_chained_import* entries
+##
 ## The input was generated from the following files:
 ##
 ## --- dylib.s:

diff  --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index d95b1c4f5fd3e..de5e33ebcae98 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -94,6 +94,8 @@ static std::vector<std::string> ArchFlags;
 static bool ArchAll = false;
 static std::string ThumbTripleName;
 
+static StringRef ordinalName(const object::MachOObjectFile *, int);
+
 void objdump::parseMachOOptions(const llvm::opt::InputArgList &InputArgs) {
   FirstPrivateHeader = InputArgs.hasArg(OBJDUMP_private_header);
   ExportsTrie = InputArgs.hasArg(OBJDUMP_exports_trie);
@@ -1282,6 +1284,26 @@ PrintChainedFixupsSegment(const MachOObjectFile::ChainedFixupsSegment &Segment,
   }
 }
 
+static void PrintChainedFixupTarget(ChainedFixupTarget &Target, size_t Idx,
+                                    int Format, MachOObjectFile *O) {
+  if (Format == MachO::DYLD_CHAINED_IMPORT)
+    outs() << "dyld chained import";
+  else if (Format == MachO::DYLD_CHAINED_IMPORT_ADDEND)
+    outs() << "dyld chained import addend";
+  else if (Format == MachO::DYLD_CHAINED_IMPORT_ADDEND64)
+    outs() << "dyld chained import addend64";
+  // FIXME: otool prints the encoded value as well.
+  outs() << '[' << Idx << "]\n";
+
+  outs() << "  lib_ordinal = " << Target.libOrdinal() << " ("
+         << ordinalName(O, Target.libOrdinal()) << ")\n";
+  outs() << "  weak_import = " << Target.weakImport() << '\n';
+  outs() << "  name_offset = " << Target.nameOffset() << " ("
+         << Target.symbolName() << ")\n";
+  if (Format != MachO::DYLD_CHAINED_IMPORT)
+    outs() << "  addend      = " << (int64_t)Target.addend() << '\n';
+}
+
 static void PrintChainedFixups(MachOObjectFile *O) {
   // MachOObjectFile::getChainedFixupsHeader() reads LC_DYLD_CHAINED_FIXUPS.
   // FIXME: Support chained fixups in __TEXT,__chain_starts section too.
@@ -1314,7 +1336,12 @@ static void PrintChainedFixups(MachOObjectFile *O) {
   for (const MachOObjectFile::ChainedFixupsSegment &S : Segments)
     PrintChainedFixupsSegment(S, SegNames[S.SegIdx]);
 
-  // FIXME: Print more things.
+  auto FixupTargets =
+      unwrapOrError(O->getDyldChainedFixupTargets(), O->getFileName());
+
+  uint32_t ImportsFormat = ChainedFixupHeader->imports_format;
+  for (auto [Idx, Target] : enumerate(FixupTargets))
+    PrintChainedFixupTarget(Target, Idx, ImportsFormat, O);
 }
 
 static void PrintDyldInfo(MachOObjectFile *O) {
@@ -10508,6 +10535,8 @@ static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) {
     return "main-executable";
   case MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP:
     return "flat-namespace";
+  case MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP:
+    return "weak";
   default:
     if (Ordinal > 0) {
       std::error_code EC =


        


More information about the llvm-commits mailing list