[llvm] 282bf21 - [llvm-nm][MachO] Add support for `MH_FILESET`

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 5 09:51:47 PDT 2023


Author: Antonio Frighetto
Date: 2023-09-05T18:51:12+02:00
New Revision: 282bf213ee2f0fc0c259efa4e9f6283a1f1a2ae1

URL: https://github.com/llvm/llvm-project/commit/282bf213ee2f0fc0c259efa4e9f6283a1f1a2ae1
DIFF: https://github.com/llvm/llvm-project/commit/282bf213ee2f0fc0c259efa4e9f6283a1f1a2ae1.diff

LOG: [llvm-nm][MachO] Add support for `MH_FILESET`

Support printing of symbols for MachO of `MH_FILESET` type.
This is achieved by extending `dumpSymbolNamesFromObject`
to encompass fileset handling, and including an offset in
`MachOObjectFile` class to locate embedded MachO headers.

Differential Revision: https://reviews.llvm.org/D159294

Added: 
    llvm/test/tools/llvm-nm/AArch64/Inputs/fileset.macho-aarch64
    llvm/test/tools/llvm-nm/AArch64/macho-fileset.test

Modified: 
    llvm/include/llvm/BinaryFormat/MachO.h
    llvm/include/llvm/Object/MachO.h
    llvm/include/llvm/Object/ObjectFile.h
    llvm/lib/Object/MachOObjectFile.cpp
    llvm/lib/ObjectYAML/MachOYAML.cpp
    llvm/tools/llvm-nm/llvm-nm.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h
index 6ce0dec14a3e363..f59cd14c1b5c055 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/llvm/include/llvm/BinaryFormat/MachO.h
@@ -898,12 +898,17 @@ struct linker_option_command {
   uint32_t count;
 };
 
+union lc_str {
+  uint32_t offset;
+};
+
 struct fileset_entry_command {
   uint32_t cmd;
   uint32_t cmdsize;
   uint64_t vmaddr;
   uint64_t fileoff;
-  uint32_t entry_id;
+  union lc_str entry_id;
+  uint32_t reserved;
 };
 
 // The symseg_command is obsolete and no longer supported.
@@ -1434,7 +1439,8 @@ inline void swapStruct(fileset_entry_command &C) {
   sys::swapByteOrder(C.cmdsize);
   sys::swapByteOrder(C.vmaddr);
   sys::swapByteOrder(C.fileoff);
-  sys::swapByteOrder(C.entry_id);
+  sys::swapByteOrder(C.entry_id.offset);
+  sys::swapByteOrder(C.reserved);
 }
 
 inline void swapStruct(version_min_command &C) {

diff  --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index 54c876a8cb4f9fc..894252db538f9e7 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -414,7 +414,8 @@ class MachOObjectFile : public ObjectFile {
 
   static Expected<std::unique_ptr<MachOObjectFile>>
   create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
-         uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0);
+         uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0,
+         size_t MachOFilesetEntryOffset = 0);
 
   static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch);
 
@@ -697,6 +698,8 @@ class MachOObjectFile : public ObjectFile {
   getRoutinesCommand64(const LoadCommandInfo &L) const;
   MachO::thread_command
   getThreadCommand(const LoadCommandInfo &L) const;
+  MachO::fileset_entry_command
+  getFilesetEntryLoadCommand(const LoadCommandInfo &L) const;
 
   MachO::any_relocation_info getRelocation(DataRefImpl Rel) const;
   MachO::data_in_code_entry getDice(DataRefImpl Rel) const;
@@ -760,6 +763,8 @@ class MachOObjectFile : public ObjectFile {
 
   bool hasPageZeroSegment() const { return HasPageZeroSegment; }
 
+  size_t getMachOFilesetEntryOffset() const { return MachOFilesetEntryOffset; }
+
   static bool classof(const Binary *v) {
     return v->isMachO();
   }
@@ -839,7 +844,8 @@ class MachOObjectFile : public ObjectFile {
 private:
   MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
                   Error &Err, uint32_t UniversalCputype = 0,
-                  uint32_t UniversalIndex = 0);
+                  uint32_t UniversalIndex = 0,
+                  size_t MachOFilesetEntryOffset = 0);
 
   uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
 
@@ -867,6 +873,7 @@ class MachOObjectFile : public ObjectFile {
   const char *DyldExportsTrieLoadCmd = nullptr;
   const char *UuidLoadCmd = nullptr;
   bool HasPageZeroSegment = false;
+  size_t MachOFilesetEntryOffset = 0;
 };
 
 /// DiceRef

diff  --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 2b614185c694b63..c254fc2ccfde5d9 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -389,9 +389,9 @@ class ObjectFile : public SymbolicFile {
   createELFObjectFile(MemoryBufferRef Object, bool InitContent = true);
 
   static Expected<std::unique_ptr<MachOObjectFile>>
-  createMachOObjectFile(MemoryBufferRef Object,
-                        uint32_t UniversalCputype = 0,
-                        uint32_t UniversalIndex = 0);
+  createMachOObjectFile(MemoryBufferRef Object, uint32_t UniversalCputype = 0,
+                        uint32_t UniversalIndex = 0,
+                        size_t MachOFilesetEntryOffset = 0);
 
   static Expected<std::unique_ptr<ObjectFile>>
   createGOFFObjectFile(MemoryBufferRef Object);

diff  --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 6ca83a955d5a1aa..aa57de16ed18f44 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -108,9 +108,11 @@ getSectionPtr(const MachOObjectFile &O, MachOObjectFile::LoadCommandInfo L,
   return reinterpret_cast<const char*>(SectionAddr);
 }
 
-static const char *getPtr(const MachOObjectFile &O, size_t Offset) {
-  assert(Offset <= O.getData().size());
-  return O.getData().data() + Offset;
+static const char *getPtr(const MachOObjectFile &O, size_t Offset,
+                          size_t MachOFilesetEntryOffset = 0) {
+  assert(Offset <= O.getData().size() &&
+         MachOFilesetEntryOffset <= O.getData().size());
+  return O.getData().data() + Offset + MachOFilesetEntryOffset;
 }
 
 static MachO::nlist_base
@@ -208,7 +210,8 @@ getFirstLoadCommandInfo(const MachOObjectFile &Obj) {
   if (sizeof(MachO::load_command) > Obj.getHeader().sizeofcmds)
     return malformedError("load command 0 extends past the end all load "
                           "commands in the file");
-  return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize), 0);
+  return getLoadCommandInfo(
+      Obj, getPtr(Obj, HeaderSize, Obj.getMachOFilesetEntryOffset()), 0);
 }
 
 static Expected<MachOObjectFile::LoadCommandInfo>
@@ -217,7 +220,8 @@ getNextLoadCommandInfo(const MachOObjectFile &Obj, uint32_t LoadCommandIndex,
   unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64)
                                       : sizeof(MachO::mach_header);
   if (L.Ptr + L.C.cmdsize + sizeof(MachO::load_command) >
-      Obj.getData().data() + HeaderSize + Obj.getHeader().sizeofcmds)
+      Obj.getData().data() + Obj.getMachOFilesetEntryOffset() + HeaderSize +
+          Obj.getHeader().sizeofcmds)
     return malformedError("load command " + Twine(LoadCommandIndex + 1) +
                           " extends past the end all load commands in the file");
   return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize, LoadCommandIndex + 1);
@@ -231,7 +235,8 @@ static void parseHeader(const MachOObjectFile &Obj, T &Header,
                          "file");
     return;
   }
-  if (auto HeaderOrErr = getStructOrErr<T>(Obj, getPtr(Obj, 0)))
+  if (auto HeaderOrErr = getStructOrErr<T>(
+          Obj, getPtr(Obj, 0, Obj.getMachOFilesetEntryOffset())))
     Header = *HeaderOrErr;
   else
     Err = HeaderOrErr.takeError();
@@ -1247,12 +1252,12 @@ static bool isLoadCommandObsolete(uint32_t cmd) {
 Expected<std::unique_ptr<MachOObjectFile>>
 MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian,
                         bool Is64Bits, uint32_t UniversalCputype,
-                        uint32_t UniversalIndex) {
+                        uint32_t UniversalIndex,
+                        size_t MachOFilesetEntryOffset) {
   Error Err = Error::success();
-  std::unique_ptr<MachOObjectFile> Obj(
-      new MachOObjectFile(std::move(Object), IsLittleEndian,
-                          Is64Bits, Err, UniversalCputype,
-                          UniversalIndex));
+  std::unique_ptr<MachOObjectFile> Obj(new MachOObjectFile(
+      std::move(Object), IsLittleEndian, Is64Bits, Err, UniversalCputype,
+      UniversalIndex, MachOFilesetEntryOffset));
   if (Err)
     return std::move(Err);
   return std::move(Obj);
@@ -1261,8 +1266,10 @@ MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian,
 MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
                                  bool Is64bits, Error &Err,
                                  uint32_t UniversalCputype,
-                                 uint32_t UniversalIndex)
-    : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) {
+                                 uint32_t UniversalIndex,
+                                 size_t MachOFilesetEntryOffset)
+    : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
+      MachOFilesetEntryOffset(MachOFilesetEntryOffset) {
   ErrorAsOutParameter ErrAsOutParam(&Err);
   uint64_t SizeOfHeaders;
   uint32_t cputype;
@@ -4761,6 +4768,11 @@ MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const {
   return getStruct<MachO::thread_command>(*this, L.Ptr);
 }
 
+MachO::fileset_entry_command
+MachOObjectFile::getFilesetEntryLoadCommand(const LoadCommandInfo &L) const {
+  return getStruct<MachO::fileset_entry_command>(*this, L.Ptr);
+}
+
 MachO::any_relocation_info
 MachOObjectFile::getRelocation(DataRefImpl Rel) const {
   uint32_t Offset;
@@ -5300,23 +5312,29 @@ bool MachOObjectFile::isRelocatableObject() const {
   return getHeader().filetype == MachO::MH_OBJECT;
 }
 
-Expected<std::unique_ptr<MachOObjectFile>>
-ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer,
-                                  uint32_t UniversalCputype,
-                                  uint32_t UniversalIndex) {
+/// Create a MachOObjectFile instance from a given buffer.
+///
+/// \param Buffer Memory buffer containing the MachO binary data.
+/// \param UniversalCputype CPU type when the MachO part of a universal binary.
+/// \param UniversalIndex Index of the MachO within a universal binary.
+/// \param MachOFilesetEntryOffset Offset of the MachO entry in a fileset MachO.
+/// \returns A std::unique_ptr to a MachOObjectFile instance on success.
+Expected<std::unique_ptr<MachOObjectFile>> ObjectFile::createMachOObjectFile(
+    MemoryBufferRef Buffer, uint32_t UniversalCputype, uint32_t UniversalIndex,
+    size_t MachOFilesetEntryOffset) {
   StringRef Magic = Buffer.getBuffer().slice(0, 4);
   if (Magic == "\xFE\xED\xFA\xCE")
-    return MachOObjectFile::create(Buffer, false, false,
-                                   UniversalCputype, UniversalIndex);
+    return MachOObjectFile::create(Buffer, false, false, UniversalCputype,
+                                   UniversalIndex, MachOFilesetEntryOffset);
   if (Magic == "\xCE\xFA\xED\xFE")
-    return MachOObjectFile::create(Buffer, true, false,
-                                   UniversalCputype, UniversalIndex);
+    return MachOObjectFile::create(Buffer, true, false, UniversalCputype,
+                                   UniversalIndex, MachOFilesetEntryOffset);
   if (Magic == "\xFE\xED\xFA\xCF")
-    return MachOObjectFile::create(Buffer, false, true,
-                                   UniversalCputype, UniversalIndex);
+    return MachOObjectFile::create(Buffer, false, true, UniversalCputype,
+                                   UniversalIndex, MachOFilesetEntryOffset);
   if (Magic == "\xCF\xFA\xED\xFE")
-    return MachOObjectFile::create(Buffer, true, true,
-                                   UniversalCputype, UniversalIndex);
+    return MachOObjectFile::create(Buffer, true, true, UniversalCputype,
+                                   UniversalIndex, MachOFilesetEntryOffset);
   return make_error<GenericBinaryError>("Unrecognized MachO magic number",
                                         object_error::invalid_file_type);
 }

diff  --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp
index 56120901be23b71..86342c5501c708f 100644
--- a/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -627,7 +627,8 @@ void MappingTraits<MachO::fileset_entry_command>::mapping(
     IO &IO, MachO::fileset_entry_command &LoadCommand) {
   IO.mapRequired("vmaddr", LoadCommand.vmaddr);
   IO.mapRequired("fileoff", LoadCommand.fileoff);
-  IO.mapRequired("id", LoadCommand.entry_id);
+  IO.mapRequired("id", LoadCommand.entry_id.offset);
+  IO.mapOptional("reserved", LoadCommand.reserved);
 }
 
 } // end namespace yaml

diff  --git a/llvm/test/tools/llvm-nm/AArch64/Inputs/fileset.macho-aarch64 b/llvm/test/tools/llvm-nm/AArch64/Inputs/fileset.macho-aarch64
new file mode 100644
index 000000000000000..b31df0b4c8c5501
Binary files /dev/null and b/llvm/test/tools/llvm-nm/AArch64/Inputs/fileset.macho-aarch64 
diff er

diff  --git a/llvm/test/tools/llvm-nm/AArch64/macho-fileset.test b/llvm/test/tools/llvm-nm/AArch64/macho-fileset.test
new file mode 100644
index 000000000000000..8e51fa0e2d2fcaa
--- /dev/null
+++ b/llvm/test/tools/llvm-nm/AArch64/macho-fileset.test
@@ -0,0 +1,5 @@
+RUN: llvm-nm %p/Inputs/fileset.macho-aarch64 | FileCheck %s
+
+CHECK:      Symbols for fileset_entry:
+CHECK-NEXT: 0000000000004010 s bar
+CHECK-NEXT: 0000000000004000 s foo

diff  --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index 4aa585291bd7827..129eb895bafeeb0 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/BinaryFormat/XCOFF.h"
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/Function.h"
@@ -29,6 +30,7 @@
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
 #include "llvm/Object/TapiFile.h"
 #include "llvm/Object/TapiUniversal.h"
 #include "llvm/Object/Wasm.h"
@@ -1890,26 +1892,18 @@ static Expected<bool> hasSymbols(SymbolicFile &Obj) {
   return !Obj.symbols().empty();
 }
 
-static void dumpSymbolNamesFromObject(
+static void printSymbolNamesFromObject(
     SymbolicFile &Obj, std::vector<NMSymbol> &SymbolList,
     bool PrintSymbolObject, bool PrintObjectLabel, StringRef ArchiveName = {},
     StringRef ArchitectureName = {}, StringRef ObjectName = {},
     bool PrintArchiveName = true) {
-  if (!shouldDump(Obj))
-    return;
-
-  if (ExportSymbols && Obj.isXCOFF()) {
-    XCOFFObjectFile *XCOFFObj = cast<XCOFFObjectFile>(&Obj);
-    getXCOFFExports(XCOFFObj, SymbolList, ArchiveName);
-    return;
-  }
 
   if (PrintObjectLabel && !ExportSymbols)
     printObjectLabel(PrintArchiveName, ArchiveName, ArchitectureName,
                      ObjectName.empty() ? Obj.getFileName() : ObjectName);
+
   if (!getSymbolNamesFromObject(Obj, SymbolList) || ExportSymbols)
     return;
-  CurrentFilename = Obj.getFileName();
 
   // If there is an error in hasSymbols(), the error should be encountered in
   // function getSymbolNamesFromObject first.
@@ -1923,6 +1917,68 @@ static void dumpSymbolNamesFromObject(
                   ArchitectureName);
 }
 
+static void dumpSymbolsNameFromMachOFilesetEntry(
+    MachOObjectFile *Obj, std::vector<NMSymbol> &SymbolList,
+    bool PrintSymbolObject, bool PrintObjectLabel) {
+  auto Buf = Obj->getMemoryBufferRef();
+  const auto *End = Obj->load_commands().end();
+  for (const auto *It = Obj->load_commands().begin(); It != End; ++It) {
+    const auto &Command = *It;
+    if (Command.C.cmd != MachO::LC_FILESET_ENTRY)
+      continue;
+
+    MachO::fileset_entry_command Entry =
+        Obj->getFilesetEntryLoadCommand(Command);
+    auto MaybeMachO =
+        MachOObjectFile::createMachOObjectFile(Buf, 0, 0, Entry.fileoff);
+
+    if (Error Err = MaybeMachO.takeError())
+      report_fatal_error(std::move(Err));
+
+    const char *EntryName = Command.Ptr + Entry.entry_id.offset;
+    if (EntryName)
+      outs() << "Symbols for " << EntryName << ": \n";
+
+    std::unique_ptr<MachOObjectFile> EntryMachO = std::move(MaybeMachO.get());
+    printSymbolNamesFromObject(*EntryMachO, SymbolList, PrintSymbolObject,
+                               PrintObjectLabel);
+
+    if (std::next(It) != End)
+      outs() << "\n";
+  }
+}
+
+static void dumpSymbolNamesFromObject(
+    SymbolicFile &Obj, std::vector<NMSymbol> &SymbolList,
+    bool PrintSymbolObject, bool PrintObjectLabel, StringRef ArchiveName = {},
+    StringRef ArchitectureName = {}, StringRef ObjectName = {},
+    bool PrintArchiveName = true) {
+  if (!shouldDump(Obj))
+    return;
+
+  if (ExportSymbols && Obj.isXCOFF()) {
+    XCOFFObjectFile *XCOFFObj = cast<XCOFFObjectFile>(&Obj);
+    getXCOFFExports(XCOFFObj, SymbolList, ArchiveName);
+    return;
+  }
+
+  CurrentFilename = Obj.getFileName();
+
+  // Are we handling a MachO of type MH_FILESET?
+  if (Obj.isMachO() && Obj.is64Bit() &&
+      cast<MachOObjectFile>(&Obj)->getHeader64().filetype ==
+          MachO::MH_FILESET) {
+    dumpSymbolsNameFromMachOFilesetEntry(cast<MachOObjectFile>(&Obj),
+                                         SymbolList, PrintSymbolObject,
+                                         PrintObjectLabel);
+    return;
+  }
+
+  printSymbolNamesFromObject(Obj, SymbolList, PrintSymbolObject,
+                             PrintObjectLabel, ArchiveName, ArchitectureName,
+                             ObjectName, PrintArchiveName);
+}
+
 // checkMachOAndArchFlags() checks to see if the SymbolicFile is a Mach-O file
 // and if it is and there is a list of architecture flags is specified then
 // check to make sure this Mach-O file is one of those architectures or all


        


More information about the llvm-commits mailing list