[lld] 9065fe5 - [lld-macho] Refactor archive loading

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 26 15:57:36 PDT 2021


Author: Jez Ng
Date: 2021-08-26T18:52:07-04:00
New Revision: 9065fe55911923c158e0d0e739081996d4ff86f2

URL: https://github.com/llvm/llvm-project/commit/9065fe55911923c158e0d0e739081996d4ff86f2
DIFF: https://github.com/llvm/llvm-project/commit/9065fe55911923c158e0d0e739081996d4ff86f2.diff

LOG: [lld-macho] Refactor archive loading

The previous logic was duplicated between symbol-initiated
archive loads versus flag-initiated loads (i.e. `-force_load` and
`-ObjC`). This resulted in code duplication as well as redundant work --
we would create Archive instances twice whenever we had one of those
flags; once in `getArchiveMembers` and again when we constructed the
ArchiveFile.

This was motivated by an upcoming diff where we load archive members
containing ObjC-related symbols before loading those containing
ObjC-related sections, as well as before performing symbol resolution.
Without this refactor, it would be difficult to do that while avoiding
loading the same archive member twice.

Differential Revision: https://reviews.llvm.org/D108780

Added: 
    

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/Driver.h
    lld/MachO/DriverUtils.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/ObjC.cpp
    lld/test/MachO/invalid/bad-archive-member.s
    lld/test/MachO/thin-archive.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 3098e6dac0d6c..b5458dcc89c2f 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -224,49 +224,6 @@ static llvm::CachePruningPolicy getLTOCachePolicy(InputArgList &args) {
   return CHECK(parseCachePruningPolicy(ltoPolicy), "invalid LTO cache policy");
 }
 
-namespace {
-struct ArchiveMember {
-  MemoryBufferRef mbref;
-  uint32_t modTime;
-  uint64_t offsetInArchive;
-};
-} // namespace
-
-// Returns slices of MB by parsing MB as an archive file.
-// Each slice consists of a member file in the archive.
-static std::vector<ArchiveMember> getArchiveMembers(MemoryBufferRef mb) {
-  std::unique_ptr<Archive> file =
-      CHECK(Archive::create(mb),
-            mb.getBufferIdentifier() + ": failed to parse archive");
-  Archive *archive = file.get();
-  make<std::unique_ptr<Archive>>(std::move(file)); // take ownership
-
-  std::vector<ArchiveMember> v;
-  Error err = Error::success();
-
-  // Thin archives refer to .o files, so --reproduce needs the .o files too.
-  bool addToTar = archive->isThin() && tar;
-
-  for (const Archive::Child &c : archive->children(err)) {
-    MemoryBufferRef mbref =
-        CHECK(c.getMemoryBufferRef(),
-              mb.getBufferIdentifier() +
-                  ": could not get the buffer for a child of the archive");
-    if (addToTar)
-      tar->append(relativeToRoot(check(c.getFullName())), mbref.getBuffer());
-    uint32_t modTime = toTimeT(
-        CHECK(c.getLastModified(), mb.getBufferIdentifier() +
-                                       ": could not get the modification "
-                                       "time for a child of the archive"));
-    v.push_back({mbref, modTime, c.getChildOffset()});
-  }
-  if (err)
-    fatal(mb.getBufferIdentifier() +
-          ": Archive::children failed: " + toString(std::move(err)));
-
-  return v;
-}
-
 static DenseMap<StringRef, ArchiveFile *> loadedArchives;
 
 static InputFile *addFile(StringRef path, bool forceLoadArchive,
@@ -289,48 +246,52 @@ static InputFile *addFile(StringRef path, bool forceLoadArchive,
     if (ArchiveFile *cachedFile = loadedArchives[path])
       return cachedFile;
 
-    std::unique_ptr<object::Archive> file = CHECK(
+    std::unique_ptr<object::Archive> archive = CHECK(
         object::Archive::create(mbref), path + ": failed to parse archive");
 
-    if (!file->isEmpty() && !file->hasSymbolTable())
+    if (!archive->isEmpty() && !archive->hasSymbolTable())
       error(path + ": archive has no index; run ranlib to add one");
 
+    auto *file = make<ArchiveFile>(std::move(archive));
     if (config->allLoad || forceLoadArchive) {
       if (Optional<MemoryBufferRef> buffer = readFile(path)) {
-        for (const ArchiveMember &member : getArchiveMembers(*buffer)) {
-          if (Optional<InputFile *> file = loadArchiveMember(
-                  member.mbref, member.modTime, path, /*objCOnly=*/false,
-                  member.offsetInArchive)) {
-            inputFiles.insert(*file);
-            printArchiveMemberLoad(
-                (forceLoadArchive ? "-force_load" : "-all_load"),
-                inputFiles.back());
-          }
+        Error e = Error::success();
+        for (const object::Archive::Child &c : file->getArchive().children(e)) {
+          StringRef reason = forceLoadArchive ? "-force_load" : "-all_load";
+          if (Error e = file->fetch(c, reason))
+            error(toString(file) + ": " + reason +
+                  " failed to load archive member: " + toString(std::move(e)));
         }
+        if (e)
+          error(toString(file) +
+                ": Archive::children failed: " + toString(std::move(e)));
       }
     } else if (config->forceLoadObjC) {
-      for (const object::Archive::Symbol &sym : file->symbols())
+      for (const object::Archive::Symbol &sym : file->getArchive().symbols())
         if (sym.getName().startswith(objc::klass))
           symtab->addUndefined(sym.getName(), /*file=*/nullptr,
                                /*isWeakRef=*/false);
 
       // TODO: no need to look for ObjC sections for a given archive member if
-      // we already found that it contains an ObjC symbol. We should also
-      // consider creating a LazyObjFile class in order to avoid double-loading
-      // these files here and below (as part of the ArchiveFile).
+      // we already found that it contains an ObjC symbol.
       if (Optional<MemoryBufferRef> buffer = readFile(path)) {
-        for (const ArchiveMember &member : getArchiveMembers(*buffer)) {
-          if (Optional<InputFile *> file = loadArchiveMember(
-                  member.mbref, member.modTime, path, /*objCOnly=*/true,
-                  member.offsetInArchive)) {
-            inputFiles.insert(*file);
-            printArchiveMemberLoad("-ObjC", inputFiles.back());
-          }
+        Error e = Error::success();
+        for (const object::Archive::Child &c : file->getArchive().children(e)) {
+          Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+          if (!mb || !hasObjCSection(*mb))
+            continue;
+          if (Error e = file->fetch(c, "-ObjC"))
+            error(toString(file) + ": -ObjC failed to load archive member: " +
+                  toString(std::move(e)));
         }
+        if (e)
+          error(toString(file) +
+                ": Archive::children failed: " + toString(std::move(e)));
       }
     }
 
-    newFile = loadedArchives[path] = make<ArchiveFile>(std::move(file));
+    file->addLazySymbols();
+    newFile = loadedArchives[path] = file;
     break;
   }
   case file_magic::macho_object:

diff  --git a/lld/MachO/Driver.h b/lld/MachO/Driver.h
index 10f307e780c27..b2d5721290e7e 100644
--- a/lld/MachO/Driver.h
+++ b/lld/MachO/Driver.h
@@ -68,11 +68,6 @@ findPathCombination(const llvm::Twine &name,
 // rerooted.
 llvm::StringRef rerootPath(llvm::StringRef path);
 
-llvm::Optional<InputFile *> loadArchiveMember(MemoryBufferRef, uint32_t modTime,
-                                              StringRef archiveName,
-                                              bool objCOnly,
-                                              uint64_t offsetInArchive);
-
 uint32_t getModTime(llvm::StringRef path);
 
 void printArchiveMemberLoad(StringRef reason, const InputFile *);

diff  --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp
index fc25182d91405..0cdea438b44ad 100644
--- a/lld/MachO/DriverUtils.cpp
+++ b/lld/MachO/DriverUtils.cpp
@@ -18,7 +18,6 @@
 #include "lld/Common/Reproduce.h"
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/LTO/LTO.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
@@ -277,30 +276,6 @@ StringRef macho::rerootPath(StringRef path) {
   return path;
 }
 
-Optional<InputFile *> macho::loadArchiveMember(MemoryBufferRef mb,
-                                               uint32_t modTime,
-                                               StringRef archiveName,
-                                               bool objCOnly,
-                                               uint64_t offsetInArchive) {
-  if (config->zeroModTime)
-    modTime = 0;
-
-  switch (identify_magic(mb.getBuffer())) {
-  case file_magic::macho_object:
-    if (!objCOnly || hasObjCSection(mb))
-      return make<ObjFile>(mb, modTime, archiveName);
-    return None;
-  case file_magic::bitcode:
-    if (!objCOnly || check(isBitcodeContainingObjCCategory(mb)))
-      return make<BitcodeFile>(mb, archiveName, offsetInArchive);
-    return None;
-  default:
-    error(archiveName + ": archive member " + mb.getBufferIdentifier() +
-          " has unhandled file type");
-    return None;
-  }
-}
-
 uint32_t macho::getModTime(StringRef path) {
   if (config->zeroModTime)
     return 0;

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 8d64f2731d8d8..aaa914e21c365 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1233,47 +1233,75 @@ void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
 }
 
 ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f)
-    : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {
+    : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {}
+
+void ArchiveFile::addLazySymbols() {
   for (const object::Archive::Symbol &sym : file->symbols())
     symtab->addLazy(sym.getName(), this, sym);
 }
 
-void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
-  object::Archive::Child c =
-      CHECK(sym.getMember(), toString(this) +
-                                 ": could not get the member for symbol " +
-                                 toMachOString(sym));
+static Expected<InputFile *> loadArchiveMember(MemoryBufferRef mb,
+                                               uint32_t modTime,
+                                               StringRef archiveName,
+                                               uint64_t offsetInArchive) {
+  if (config->zeroModTime)
+    modTime = 0;
+
+  switch (identify_magic(mb.getBuffer())) {
+  case file_magic::macho_object:
+    return make<ObjFile>(mb, modTime, archiveName);
+  case file_magic::bitcode:
+    return make<BitcodeFile>(mb, archiveName, offsetInArchive);
+  default:
+    return createStringError(inconvertibleErrorCode(),
+                             mb.getBufferIdentifier() +
+                                 " has unhandled file type");
+  }
+}
 
+Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
   if (!seen.insert(c.getChildOffset()).second)
-    return;
+    return Error::success();
 
-  MemoryBufferRef mb =
-      CHECK(c.getMemoryBufferRef(),
-            toString(this) +
-                ": could not get the buffer for the member defining symbol " +
-                toMachOString(sym));
+  Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+  if (!mb)
+    return mb.takeError();
 
+  // Thin archives refer to .o files, so --reproduce needs the .o files too.
   if (tar && c.getParent()->isThin())
-    tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer());
+    tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb->getBuffer());
+
+  Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
+  if (!modTime)
+    return modTime.takeError();
 
-  uint32_t modTime = toTimeT(
-      CHECK(c.getLastModified(), toString(this) +
-                                     ": could not get the modification time "
-                                     "for the member defining symbol " +
-                                     toMachOString(sym)));
+  Expected<InputFile *> file =
+      loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset());
+
+  if (!file)
+    return file.takeError();
+
+  inputFiles.insert(*file);
+  printArchiveMemberLoad(reason, *file);
+  return Error::success();
+}
+
+void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
+  object::Archive::Child c =
+      CHECK(sym.getMember(), toString(this) +
+                                 ": could not get the member defining symbol " +
+                                 toMachOString(sym));
 
   // `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
   // and become invalid after that call. Copy it to the stack so we can refer
   // to it later.
   const object::Archive::Symbol symCopy = sym;
 
-  if (Optional<InputFile *> file = loadArchiveMember(
-          mb, modTime, getName(), /*objCOnly=*/false, c.getChildOffset())) {
-    inputFiles.insert(*file);
-    // ld64 doesn't demangle sym here even with -demangle.
-    // Match that: intentionally don't call toMachOString().
-    printArchiveMemberLoad(symCopy.getName(), *file);
-  }
+  // ld64 doesn't demangle sym here even with -demangle.
+  // Match that: intentionally don't call toMachOString().
+  if (Error e = fetch(c, symCopy.getName()))
+    error(toString(this) + ": could not get the member defining symbol " +
+          toMachOString(symCopy) + ": " + toString(std::move(e)));
 }
 
 static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 0101fb71c8a38..c6b3a3b5022b4 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -184,8 +184,13 @@ class DylibFile final : public InputFile {
 class ArchiveFile final : public InputFile {
 public:
   explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
+  void addLazySymbols();
+  void fetch(const llvm::object::Archive::Symbol &);
+  // LLD normally doesn't use Error for error-handling, but the underlying
+  // Archive library does, so this is the cleanest way to wrap it.
+  Error fetch(const llvm::object::Archive::Child &, StringRef reason);
+  const llvm::object::Archive &getArchive() const { return *file; };
   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
-  void fetch(const llvm::object::Archive::Symbol &sym);
 
 private:
   std::unique_ptr<llvm::object::Archive> file;

diff  --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 7ed800827f3e1..ab9aaecb621c1 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -13,13 +13,14 @@
 #include "Target.h"
 
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 
 using namespace llvm;
 using namespace llvm::MachO;
 using namespace lld;
 using namespace lld::macho;
 
-template <class LP> static bool hasObjCSection(MemoryBufferRef mb) {
+template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
   using Section = typename LP::section;
 
   auto *hdr =
@@ -46,9 +47,20 @@ template <class LP> static bool hasObjCSection(MemoryBufferRef mb) {
   return false;
 }
 
-bool macho::hasObjCSection(MemoryBufferRef mb) {
+static bool objectHasObjCSection(MemoryBufferRef mb) {
   if (target->wordSize == 8)
-    return ::hasObjCSection<LP64>(mb);
+    return ::objectHasObjCSection<LP64>(mb);
   else
-    return ::hasObjCSection<ILP32>(mb);
+    return ::objectHasObjCSection<ILP32>(mb);
+}
+
+bool macho::hasObjCSection(MemoryBufferRef mb) {
+  switch (identify_magic(mb.getBuffer())) {
+  case file_magic::macho_object:
+    return objectHasObjCSection(mb);
+  case file_magic::bitcode:
+    return check(isBitcodeContainingObjCCategory(mb));
+  default:
+    return false;
+  }
 }

diff  --git a/lld/test/MachO/invalid/bad-archive-member.s b/lld/test/MachO/invalid/bad-archive-member.s
index d8bb4092153c8..418129d5dfefe 100644
--- a/lld/test/MachO/invalid/bad-archive-member.s
+++ b/lld/test/MachO/invalid/bad-archive-member.s
@@ -4,10 +4,14 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
 # RUN: %lld -dylib -lSystem %t/foo.o -o %t/foo.dylib
 # RUN: llvm-ar rcs %t/foo.a %t/foo.dylib
-# RUN: not %lld %t/test.o %t/foo.a -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/foo.a
-# RUN: not %lld %t/test.o -force_load %t/foo.a -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/foo.a
-# RUN: not %lld %t/test.o -ObjC %t/foo.a -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/foo.a
-# CHECK: error: [[FILE]]: archive member foo.dylib has unhandled file type
+# RUN: not %lld %t/test.o %t/foo.a -o /dev/null 2>&1 | FileCheck %s \
+# RUN:   --check-prefix=SYM -DFILE=%t/foo.a
+# RUN: not %lld %t/test.o -ObjC %t/foo.a -o /dev/null 2>&1 | FileCheck %s \
+# RUN:   --check-prefix=SYM -DFILE=%t/foo.a
+# RUN: not %lld %t/test.o -force_load %t/foo.a -o /dev/null 2>&1 | FileCheck %s \
+# RUN:   --check-prefix=FORCE-LOAD -DFILE=%t/foo.a
+# SYM: error: [[FILE]]: could not get the member defining symbol _foo: foo.dylib has unhandled file type
+# FORCE-LOAD: error: [[FILE]]: -force_load failed to load archive member: foo.dylib has unhandled file type
 
 #--- foo.s
 .globl _foo

diff  --git a/lld/test/MachO/thin-archive.s b/lld/test/MachO/thin-archive.s
index 4af3f7c17ce37..248209b3f336a 100644
--- a/lld/test/MachO/thin-archive.s
+++ b/lld/test/MachO/thin-archive.s
@@ -25,8 +25,8 @@
 
 # CHECK: __Z1fv
 # CHECK: _main
-# NOOBJ: error: {{.*}}lib_thin.a: could not get the buffer for the member defining symbol f(): '{{.*}}lib.o':
-# NOOBJNODEMANGLE: error: {{.*}}lib_thin.a: could not get the buffer for the member defining symbol __Z1fv: '{{.*}}lib.o':
+# NOOBJ: error: {{.*}}lib_thin.a: could not get the member defining symbol f(): '{{.*}}lib.o': {{[N|n]}}o such file or directory
+# NOOBJNODEMANGLE: error: {{.*}}lib_thin.a: could not get the member defining symbol __Z1fv: '{{.*}}lib.o': {{[N|n]}}o such file or directory
 
 #--- mangled-symbol.s
 .globl  __Z1fv


        


More information about the llvm-commits mailing list