[llvm] 8bd8dd1 - Extend obj2yaml to optionally preserve raw __LINKEDIT/__DATA segments.

Adrian Prantl via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 8 11:30:18 PST 2021


Author: Adrian Prantl
Date: 2021-11-08T11:30:12-08:00
New Revision: 8bd8dd16e22b7a8f4df0f51ae3511b887629be57

URL: https://github.com/llvm/llvm-project/commit/8bd8dd16e22b7a8f4df0f51ae3511b887629be57
DIFF: https://github.com/llvm/llvm-project/commit/8bd8dd16e22b7a8f4df0f51ae3511b887629be57.diff

LOG: Extend obj2yaml to optionally preserve raw __LINKEDIT/__DATA segments.

I am planning to upstream MachOObjectFile code to support Darwin
chained fixups. In order to test the new parser features we need a way
to produce correct (and incorrect) chained fixups. Right now the only
tool that can produce them is the Darwin linker. To avoid having to
check in binary files, this patch allows obj2yaml to print a hexdump
of the raw LINKEDIT and DATA segment, which both allows to
bootstrap the parser and enables us to easily create malformed inputs
to test error handling in the parser.

This patch adds two new options to obj2yaml:

  -raw-data-segment
  -raw-linkedit-segment

Differential Revision: https://reviews.llvm.org/D113234

Added: 
    llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml

Modified: 
    llvm/include/llvm/Object/MachO.h
    llvm/include/llvm/ObjectYAML/MachOYAML.h
    llvm/lib/Object/MachOObjectFile.cpp
    llvm/lib/ObjectYAML/MachOEmitter.cpp
    llvm/lib/ObjectYAML/MachOYAML.cpp
    llvm/tools/obj2yaml/macho2yaml.cpp
    llvm/tools/obj2yaml/obj2yaml.cpp
    llvm/tools/obj2yaml/obj2yaml.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index d2ad12e98deb8..ca5d63e4074f0 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -311,6 +311,9 @@ class MachOObjectFile : public ObjectFile {
   bool isSectionBitcode(DataRefImpl Sec) const override;
   bool isDebugSection(DataRefImpl Sec) const override;
 
+  /// Return the raw contents of an entire segment.
+  ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const;
+
   /// When dsymutil generates the companion file, it strips all unnecessary
   /// sections (e.g. everything in the _TEXT segment) by omitting their body
   /// and setting the offset in their corresponding load command to zero.

diff  --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h
index 5d1d3ee235948..ee89f4eac61f5 100644
--- a/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -131,6 +131,7 @@ struct Object {
   std::vector<LoadCommand> LoadCommands;
   std::vector<Section> Sections;
   LinkEditData LinkEdit;
+  Optional<llvm::yaml::BinaryRef> RawLinkEditSegment;
   DWARFYAML::Data DWARF;
 };
 

diff  --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index e84defb6786e8..7501661591f06 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -2048,6 +2048,46 @@ bool MachOObjectFile::isDebugSection(DataRefImpl Sec) const {
          SectionName == "__swift_ast";
 }
 
+namespace {
+template <typename LoadCommandType>
+ArrayRef<uint8_t> getSegmentContents(const MachOObjectFile &Obj,
+                                     MachOObjectFile::LoadCommandInfo LoadCmd,
+                                     StringRef SegmentName) {
+  auto SegmentOrErr = getStructOrErr<LoadCommandType>(Obj, LoadCmd.Ptr);
+  if (!SegmentOrErr) {
+    consumeError(SegmentOrErr.takeError());
+    return {};
+  }
+  auto &Segment = SegmentOrErr.get();
+  if (StringRef(Segment.segname, 16).startswith(SegmentName))
+    return arrayRefFromStringRef(Obj.getData().slice(
+        Segment.fileoff, Segment.fileoff + Segment.filesize));
+  return {};
+}
+} // namespace
+
+ArrayRef<uint8_t>
+MachOObjectFile::getSegmentContents(StringRef SegmentName) const {
+  for (auto LoadCmd : load_commands()) {
+    ArrayRef<uint8_t> Contents;
+    switch (LoadCmd.C.cmd) {
+    case MachO::LC_SEGMENT:
+      Contents = ::getSegmentContents<MachO::segment_command>(*this, LoadCmd,
+                                                              SegmentName);
+      break;
+    case MachO::LC_SEGMENT_64:
+      Contents = ::getSegmentContents<MachO::segment_command_64>(*this, LoadCmd,
+                                                                 SegmentName);
+      break;
+    default:
+      continue;
+    }
+    if (!Contents.empty())
+      return Contents;
+  }
+  return {};
+}
+
 unsigned MachOObjectFile::getSectionID(SectionRef Sec) const {
   return Sec.getRawDataRefImpl().d.a;
 }

diff  --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp
index 63179ae614005..c653c29ec9a74 100644
--- a/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -288,6 +288,7 @@ void MachOWriter::writeLoadCommands(raw_ostream &OS) {
 }
 
 Error MachOWriter::writeSectionData(raw_ostream &OS) {
+  uint64_t LinkEditOff = 0;
   for (auto &LC : Obj.LoadCommands) {
     switch (LC.Data.load_command_data.cmd) {
     case MachO::LC_SEGMENT:
@@ -297,6 +298,9 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
       if (0 ==
           strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) {
         FoundLinkEditSeg = true;
+        LinkEditOff = segOff;
+        if (Obj.RawLinkEditSegment)
+          continue;
         writeLinkEditData(OS);
       }
       for (auto &Sec : LC.Sections) {
@@ -344,6 +348,13 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
     }
   }
 
+  if (Obj.RawLinkEditSegment) {
+    ZeroToOffset(OS, LinkEditOff);
+    if (OS.tell() - fileStart > LinkEditOff || !LinkEditOff)
+      return createStringError(errc::invalid_argument,
+                               "section offsets don't line up");
+    Obj.RawLinkEditSegment->writeAsBinary(OS);
+  }
   return Error::success();
 }
 

diff  --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp
index 757e46cefc406..c9562bd72258a 100644
--- a/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -110,6 +110,9 @@ void MappingTraits<MachOYAML::Object>::mapping(IO &IO,
   Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 ||
                                  Object.Header.magic == MachO::MH_CIGAM_64;
   IO.mapOptional("LoadCommands", Object.LoadCommands);
+
+  if (Object.RawLinkEditSegment || !IO.outputting())
+    IO.mapOptional("__LINKEDIT", Object.RawLinkEditSegment);
   if(!Object.LinkEdit.isEmpty() || !IO.outputting())
     IO.mapOptional("LinkEditData", Object.LinkEdit);
 

diff  --git a/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml b/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml
new file mode 100644
index 0000000000000..b9d0cae8ce8a1
--- /dev/null
+++ b/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml
@@ -0,0 +1,184 @@
+# Test that obj2yaml + yaml2obj can round-trip mach-o executables with
+#  raw __LINKEDIT segments.
+#
+# RUN: yaml2obj %s | obj2yaml --raw-segment=data --raw-segment=linkedit | FileCheck %s
+#
+# This file was produced using:
+#   echo "int ext;" > a.c
+#   xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o a.o a.c -c
+#   xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib a.o -o liba.dylib -install_name @executable_path/liba.dylib
+#   echo "extern int ext;" > b.c
+#   echo "int padding;" >> b.c
+#   echo "int *p = &ext + 4;" >> b.c
+#   xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o b.o b.c -c
+#   xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib b.o -o libfixups.dylib -install_name @executable_path/libfixups.dylib -L. -la
+#
+# CHECK:      - sectname:        __data
+# CHECK:        segname:         __DATA
+# CHECK:        content:         '0000001000000080'
+# CHECK: __LINKEDIT:      0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x100000C
+  cpusubtype:      0x0
+  filetype:        0x6
+  ncmds:           16
+  sizeofcmds:      816
+  flags:           0x100085
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __TEXT
+    vmaddr:          0
+    vmsize:          16384
+    fileoff:         0
+    filesize:        16384
+    maxprot:         5
+    initprot:        5
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x4000
+        size:            0
+        offset:          0x4000
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         ''
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __DATA
+    vmaddr:          16384
+    vmsize:          16384
+    fileoff:         16384
+    filesize:        16384
+    maxprot:         3
+    initprot:        3
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __data
+        segname:         __DATA
+        addr:            0x4000
+        size:            8
+        offset:          0x4000
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         '0000001000000080'
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __LINKEDIT
+    vmaddr:          32768
+    vmsize:          16384
+    fileoff:         32768
+    filesize:        160
+    maxprot:         1
+    initprot:        1
+    nsects:          0
+    flags:           0
+  - cmd:             LC_ID_DYLIB
+    cmdsize:         64
+    dylib:
+      name:            24
+      timestamp:       1
+      current_version: 0
+      compatibility_version: 0
+    Content:         '@executable_path/libfixups.dylib'
+    ZeroPadBytes:    8
+  - cmd:             LC_DYLD_CHAINED_FIXUPS
+    cmdsize:         16
+    dataoff:         32768
+    datasize:        88
+  - cmd:             LC_DYLD_EXPORTS_TRIE
+    cmdsize:         16
+    dataoff:         32856
+    datasize:        16
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          32880
+    nsyms:           2
+    stroff:          32912
+    strsize:         16
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       0
+    iextdefsym:      0
+    nextdefsym:      1
+    iundefsym:       1
+    nundefsym:       1
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+  - cmd:             LC_UUID
+    cmdsize:         24
+    uuid:            56F7BCE0-C1A7-38E3-A90D-742D8E3D5FA9
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         32
+    platform:        2
+    minos:           983296
+    sdk:             983552
+    ntools:          1
+    Tools:
+      - tool:            3
+        version:         46596096
+  - cmd:             LC_SOURCE_VERSION
+    cmdsize:         16
+    version:         0
+  - cmd:             LC_ENCRYPTION_INFO_64
+    cmdsize:         24
+    cryptoff:        16384
+    cryptsize:       0
+    cryptid:         0
+    pad:             0
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 0
+      compatibility_version: 0
+    Content:         '@executable_path/liba.dylib'
+    ZeroPadBytes:    5
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 85917696
+      compatibility_version: 65536
+    Content:         '/usr/lib/libSystem.B.dylib'
+    ZeroPadBytes:    6
+  - cmd:             LC_FUNCTION_STARTS
+    cmdsize:         16
+    dataoff:         32872
+    datasize:        8
+  - cmd:             LC_DATA_IN_CODE
+    cmdsize:         16
+    dataoff:         32880
+    datasize:        0
+__LINKEDIT:      0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000
+...

diff  --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp
index b7289bff67ed5..d3b4bf1bf8cc1 100644
--- a/llvm/tools/obj2yaml/macho2yaml.cpp
+++ b/llvm/tools/obj2yaml/macho2yaml.cpp
@@ -29,6 +29,7 @@ class MachODumper {
 
   const object::MachOObjectFile &Obj;
   std::unique_ptr<DWARFContext> DWARFCtx;
+  unsigned RawSegments;
   void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
   Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
   void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
@@ -52,8 +53,8 @@ class MachODumper {
 
 public:
   MachODumper(const object::MachOObjectFile &O,
-              std::unique_ptr<DWARFContext> DCtx)
-      : Obj(O), DWARFCtx(std::move(DCtx)) {}
+              std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
+      : Obj(O), DWARFCtx(std::move(DCtx)), RawSegments(RawSegments) {}
   Expected<std::unique_ptr<MachOYAML::Object>> dump();
 };
 
@@ -176,6 +177,13 @@ Expected<const char *> MachODumper::extractSections(
     if (Expected<MachOYAML::Section> S =
             constructSection(Sec, Sections.size() + 1)) {
       StringRef SecName(S->sectname);
+
+      // Copy data sections if requested.
+      if ((RawSegments & RawSegments::data) &&
+          StringRef(S->segname).startswith("__DATA"))
+        S->content =
+            yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
+
       if (SecName.startswith("__debug_")) {
         // If the DWARF section cannot be successfully parsed, emit raw content
         // instead of an entry in the DWARF section of the YAML.
@@ -282,7 +290,11 @@ Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
   dumpHeader(Y);
   if (Error Err = dumpLoadCommands(Y))
     return std::move(Err);
-  dumpLinkEdit(Y);
+  if (RawSegments & RawSegments::linkedit)
+    Y->RawLinkEditSegment =
+        yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
+  else
+    dumpLinkEdit(Y);
 
   return std::move(Y);
 }
@@ -587,9 +599,10 @@ void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
   }
 }
 
-Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
+Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
+                 unsigned RawSegments) {
   std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
-  MachODumper Dumper(Obj, std::move(DCtx));
+  MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
   Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
   if (!YAML)
     return YAML.takeError();
@@ -602,7 +615,8 @@ Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
   return Error::success();
 }
 
-Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
+Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
+                 unsigned RawSegments) {
   yaml::YamlObjectFile YAMLFile;
   YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
   MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
@@ -624,7 +638,7 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
       return SliceObj.takeError();
 
     std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
-    MachODumper Dumper(*SliceObj.get(), std::move(DCtx));
+    MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
     Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
     if (!YAMLObj)
       return YAMLObj.takeError();
@@ -636,12 +650,13 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
   return Error::success();
 }
 
-Error macho2yaml(raw_ostream &Out, const object::Binary &Binary) {
+Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
+                 unsigned RawSegments) {
   if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
-    return macho2yaml(Out, *MachOObj);
+    return macho2yaml(Out, *MachOObj, RawSegments);
 
   if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
-    return macho2yaml(Out, *MachOObj);
+    return macho2yaml(Out, *MachOObj, RawSegments);
 
   llvm_unreachable("unexpected Mach-O file format");
 }

diff  --git a/llvm/tools/obj2yaml/obj2yaml.cpp b/llvm/tools/obj2yaml/obj2yaml.cpp
index e9e47d1a2b186..9c7a3385850db 100644
--- a/llvm/tools/obj2yaml/obj2yaml.cpp
+++ b/llvm/tools/obj2yaml/obj2yaml.cpp
@@ -1,4 +1,4 @@
-//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===//
+//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -----------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -18,6 +18,14 @@
 using namespace llvm;
 using namespace llvm::object;
 
+static cl::opt<std::string>
+    InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+static cl::bits<RawSegments> RawSegment(
+    "raw-segment",
+    cl::desc("Mach-O: dump the raw contents of the listed segments instead of "
+             "parsing them:"),
+    cl::values(clEnumVal(data, "__DATA"), clEnumVal(linkedit, "__LINKEDIT")));
+
 static Error dumpObject(const ObjectFile &Obj) {
   if (Obj.isCOFF())
     return errorCodeToError(coff2yaml(outs(), cast<COFFObjectFile>(Obj)));
@@ -54,7 +62,7 @@ static Error dumpInput(StringRef File) {
   // Universal MachO is not a subclass of ObjectFile, so it needs to be handled
   // here with the other binary types.
   if (Binary.isMachO() || Binary.isMachOUniversalBinary())
-    return macho2yaml(outs(), Binary);
+    return macho2yaml(outs(), Binary, RawSegment.getBits());
   if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
     return dumpObject(*Obj);
   if (MinidumpFile *Minidump = dyn_cast<MinidumpFile>(&Binary))
@@ -74,9 +82,6 @@ static void reportError(StringRef Input, Error Err) {
   errs().flush();
 }
 
-cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"),
-                                   cl::init("-"));
-
 int main(int argc, char *argv[]) {
   InitLLVM X(argc, argv);
   cl::ParseCommandLineOptions(argc, argv);

diff  --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h
index fdd9b2a00185c..c026482eaf0cf 100644
--- a/llvm/tools/obj2yaml/obj2yaml.h
+++ b/llvm/tools/obj2yaml/obj2yaml.h
@@ -20,12 +20,13 @@
 #include "llvm/Support/MemoryBufferRef.h"
 #include <system_error>
 
+enum RawSegments : unsigned { none = 0, data = 1, linkedit = 1 << 1 };
 std::error_code coff2yaml(llvm::raw_ostream &Out,
                           const llvm::object::COFFObjectFile &Obj);
 llvm::Error elf2yaml(llvm::raw_ostream &Out,
                          const llvm::object::ObjectFile &Obj);
-llvm::Error macho2yaml(llvm::raw_ostream &Out,
-                           const llvm::object::Binary &Obj);
+llvm::Error macho2yaml(llvm::raw_ostream &Out, const llvm::object::Binary &Obj,
+                       unsigned RawSegments);
 llvm::Error minidump2yaml(llvm::raw_ostream &Out,
                           const llvm::object::MinidumpFile &Obj);
 llvm::Error xcoff2yaml(llvm::raw_ostream &Out,


        


More information about the llvm-commits mailing list