[lld] 9065fe5 - [lld-macho] Refactor archive loading
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 26 15:57:36 PDT 2021
Author: Jez Ng
Date: 2021-08-26T18:52:07-04:00
New Revision: 9065fe55911923c158e0d0e739081996d4ff86f2
URL: https://github.com/llvm/llvm-project/commit/9065fe55911923c158e0d0e739081996d4ff86f2
DIFF: https://github.com/llvm/llvm-project/commit/9065fe55911923c158e0d0e739081996d4ff86f2.diff
LOG: [lld-macho] Refactor archive loading
The previous logic was duplicated between symbol-initiated
archive loads versus flag-initiated loads (i.e. `-force_load` and
`-ObjC`). This resulted in code duplication as well as redundant work --
we would create Archive instances twice whenever we had one of those
flags; once in `getArchiveMembers` and again when we constructed the
ArchiveFile.
This was motivated by an upcoming diff where we load archive members
containing ObjC-related symbols before loading those containing
ObjC-related sections, as well as before performing symbol resolution.
Without this refactor, it would be difficult to do that while avoiding
loading the same archive member twice.
Differential Revision: https://reviews.llvm.org/D108780
Added:
Modified:
lld/MachO/Driver.cpp
lld/MachO/Driver.h
lld/MachO/DriverUtils.cpp
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/ObjC.cpp
lld/test/MachO/invalid/bad-archive-member.s
lld/test/MachO/thin-archive.s
Removed:
################################################################################
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 3098e6dac0d6c..b5458dcc89c2f 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -224,49 +224,6 @@ static llvm::CachePruningPolicy getLTOCachePolicy(InputArgList &args) {
return CHECK(parseCachePruningPolicy(ltoPolicy), "invalid LTO cache policy");
}
-namespace {
-struct ArchiveMember {
- MemoryBufferRef mbref;
- uint32_t modTime;
- uint64_t offsetInArchive;
-};
-} // namespace
-
-// Returns slices of MB by parsing MB as an archive file.
-// Each slice consists of a member file in the archive.
-static std::vector<ArchiveMember> getArchiveMembers(MemoryBufferRef mb) {
- std::unique_ptr<Archive> file =
- CHECK(Archive::create(mb),
- mb.getBufferIdentifier() + ": failed to parse archive");
- Archive *archive = file.get();
- make<std::unique_ptr<Archive>>(std::move(file)); // take ownership
-
- std::vector<ArchiveMember> v;
- Error err = Error::success();
-
- // Thin archives refer to .o files, so --reproduce needs the .o files too.
- bool addToTar = archive->isThin() && tar;
-
- for (const Archive::Child &c : archive->children(err)) {
- MemoryBufferRef mbref =
- CHECK(c.getMemoryBufferRef(),
- mb.getBufferIdentifier() +
- ": could not get the buffer for a child of the archive");
- if (addToTar)
- tar->append(relativeToRoot(check(c.getFullName())), mbref.getBuffer());
- uint32_t modTime = toTimeT(
- CHECK(c.getLastModified(), mb.getBufferIdentifier() +
- ": could not get the modification "
- "time for a child of the archive"));
- v.push_back({mbref, modTime, c.getChildOffset()});
- }
- if (err)
- fatal(mb.getBufferIdentifier() +
- ": Archive::children failed: " + toString(std::move(err)));
-
- return v;
-}
-
static DenseMap<StringRef, ArchiveFile *> loadedArchives;
static InputFile *addFile(StringRef path, bool forceLoadArchive,
@@ -289,48 +246,52 @@ static InputFile *addFile(StringRef path, bool forceLoadArchive,
if (ArchiveFile *cachedFile = loadedArchives[path])
return cachedFile;
- std::unique_ptr<object::Archive> file = CHECK(
+ std::unique_ptr<object::Archive> archive = CHECK(
object::Archive::create(mbref), path + ": failed to parse archive");
- if (!file->isEmpty() && !file->hasSymbolTable())
+ if (!archive->isEmpty() && !archive->hasSymbolTable())
error(path + ": archive has no index; run ranlib to add one");
+ auto *file = make<ArchiveFile>(std::move(archive));
if (config->allLoad || forceLoadArchive) {
if (Optional<MemoryBufferRef> buffer = readFile(path)) {
- for (const ArchiveMember &member : getArchiveMembers(*buffer)) {
- if (Optional<InputFile *> file = loadArchiveMember(
- member.mbref, member.modTime, path, /*objCOnly=*/false,
- member.offsetInArchive)) {
- inputFiles.insert(*file);
- printArchiveMemberLoad(
- (forceLoadArchive ? "-force_load" : "-all_load"),
- inputFiles.back());
- }
+ Error e = Error::success();
+ for (const object::Archive::Child &c : file->getArchive().children(e)) {
+ StringRef reason = forceLoadArchive ? "-force_load" : "-all_load";
+ if (Error e = file->fetch(c, reason))
+ error(toString(file) + ": " + reason +
+ " failed to load archive member: " + toString(std::move(e)));
}
+ if (e)
+ error(toString(file) +
+ ": Archive::children failed: " + toString(std::move(e)));
}
} else if (config->forceLoadObjC) {
- for (const object::Archive::Symbol &sym : file->symbols())
+ for (const object::Archive::Symbol &sym : file->getArchive().symbols())
if (sym.getName().startswith(objc::klass))
symtab->addUndefined(sym.getName(), /*file=*/nullptr,
/*isWeakRef=*/false);
// TODO: no need to look for ObjC sections for a given archive member if
- // we already found that it contains an ObjC symbol. We should also
- // consider creating a LazyObjFile class in order to avoid double-loading
- // these files here and below (as part of the ArchiveFile).
+ // we already found that it contains an ObjC symbol.
if (Optional<MemoryBufferRef> buffer = readFile(path)) {
- for (const ArchiveMember &member : getArchiveMembers(*buffer)) {
- if (Optional<InputFile *> file = loadArchiveMember(
- member.mbref, member.modTime, path, /*objCOnly=*/true,
- member.offsetInArchive)) {
- inputFiles.insert(*file);
- printArchiveMemberLoad("-ObjC", inputFiles.back());
- }
+ Error e = Error::success();
+ for (const object::Archive::Child &c : file->getArchive().children(e)) {
+ Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+ if (!mb || !hasObjCSection(*mb))
+ continue;
+ if (Error e = file->fetch(c, "-ObjC"))
+ error(toString(file) + ": -ObjC failed to load archive member: " +
+ toString(std::move(e)));
}
+ if (e)
+ error(toString(file) +
+ ": Archive::children failed: " + toString(std::move(e)));
}
}
- newFile = loadedArchives[path] = make<ArchiveFile>(std::move(file));
+ file->addLazySymbols();
+ newFile = loadedArchives[path] = file;
break;
}
case file_magic::macho_object:
diff --git a/lld/MachO/Driver.h b/lld/MachO/Driver.h
index 10f307e780c27..b2d5721290e7e 100644
--- a/lld/MachO/Driver.h
+++ b/lld/MachO/Driver.h
@@ -68,11 +68,6 @@ findPathCombination(const llvm::Twine &name,
// rerooted.
llvm::StringRef rerootPath(llvm::StringRef path);
-llvm::Optional<InputFile *> loadArchiveMember(MemoryBufferRef, uint32_t modTime,
- StringRef archiveName,
- bool objCOnly,
- uint64_t offsetInArchive);
-
uint32_t getModTime(llvm::StringRef path);
void printArchiveMemberLoad(StringRef reason, const InputFile *);
diff --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp
index fc25182d91405..0cdea438b44ad 100644
--- a/lld/MachO/DriverUtils.cpp
+++ b/lld/MachO/DriverUtils.cpp
@@ -18,7 +18,6 @@
#include "lld/Common/Reproduce.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
@@ -277,30 +276,6 @@ StringRef macho::rerootPath(StringRef path) {
return path;
}
-Optional<InputFile *> macho::loadArchiveMember(MemoryBufferRef mb,
- uint32_t modTime,
- StringRef archiveName,
- bool objCOnly,
- uint64_t offsetInArchive) {
- if (config->zeroModTime)
- modTime = 0;
-
- switch (identify_magic(mb.getBuffer())) {
- case file_magic::macho_object:
- if (!objCOnly || hasObjCSection(mb))
- return make<ObjFile>(mb, modTime, archiveName);
- return None;
- case file_magic::bitcode:
- if (!objCOnly || check(isBitcodeContainingObjCCategory(mb)))
- return make<BitcodeFile>(mb, archiveName, offsetInArchive);
- return None;
- default:
- error(archiveName + ": archive member " + mb.getBufferIdentifier() +
- " has unhandled file type");
- return None;
- }
-}
-
uint32_t macho::getModTime(StringRef path) {
if (config->zeroModTime)
return 0;
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 8d64f2731d8d8..aaa914e21c365 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1233,47 +1233,75 @@ void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
}
ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f)
- : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {
+ : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {}
+
+void ArchiveFile::addLazySymbols() {
for (const object::Archive::Symbol &sym : file->symbols())
symtab->addLazy(sym.getName(), this, sym);
}
-void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
- object::Archive::Child c =
- CHECK(sym.getMember(), toString(this) +
- ": could not get the member for symbol " +
- toMachOString(sym));
+static Expected<InputFile *> loadArchiveMember(MemoryBufferRef mb,
+ uint32_t modTime,
+ StringRef archiveName,
+ uint64_t offsetInArchive) {
+ if (config->zeroModTime)
+ modTime = 0;
+
+ switch (identify_magic(mb.getBuffer())) {
+ case file_magic::macho_object:
+ return make<ObjFile>(mb, modTime, archiveName);
+ case file_magic::bitcode:
+ return make<BitcodeFile>(mb, archiveName, offsetInArchive);
+ default:
+ return createStringError(inconvertibleErrorCode(),
+ mb.getBufferIdentifier() +
+ " has unhandled file type");
+ }
+}
+Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
if (!seen.insert(c.getChildOffset()).second)
- return;
+ return Error::success();
- MemoryBufferRef mb =
- CHECK(c.getMemoryBufferRef(),
- toString(this) +
- ": could not get the buffer for the member defining symbol " +
- toMachOString(sym));
+ Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+ if (!mb)
+ return mb.takeError();
+ // Thin archives refer to .o files, so --reproduce needs the .o files too.
if (tar && c.getParent()->isThin())
- tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer());
+ tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb->getBuffer());
+
+ Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
+ if (!modTime)
+ return modTime.takeError();
- uint32_t modTime = toTimeT(
- CHECK(c.getLastModified(), toString(this) +
- ": could not get the modification time "
- "for the member defining symbol " +
- toMachOString(sym)));
+ Expected<InputFile *> file =
+ loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset());
+
+ if (!file)
+ return file.takeError();
+
+ inputFiles.insert(*file);
+ printArchiveMemberLoad(reason, *file);
+ return Error::success();
+}
+
+void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
+ object::Archive::Child c =
+ CHECK(sym.getMember(), toString(this) +
+ ": could not get the member defining symbol " +
+ toMachOString(sym));
// `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
// and become invalid after that call. Copy it to the stack so we can refer
// to it later.
const object::Archive::Symbol symCopy = sym;
- if (Optional<InputFile *> file = loadArchiveMember(
- mb, modTime, getName(), /*objCOnly=*/false, c.getChildOffset())) {
- inputFiles.insert(*file);
- // ld64 doesn't demangle sym here even with -demangle.
- // Match that: intentionally don't call toMachOString().
- printArchiveMemberLoad(symCopy.getName(), *file);
- }
+ // ld64 doesn't demangle sym here even with -demangle.
+ // Match that: intentionally don't call toMachOString().
+ if (Error e = fetch(c, symCopy.getName()))
+ error(toString(this) + ": could not get the member defining symbol " +
+ toMachOString(symCopy) + ": " + toString(std::move(e)));
}
static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 0101fb71c8a38..c6b3a3b5022b4 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -184,8 +184,13 @@ class DylibFile final : public InputFile {
class ArchiveFile final : public InputFile {
public:
explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
+ void addLazySymbols();
+ void fetch(const llvm::object::Archive::Symbol &);
+ // LLD normally doesn't use Error for error-handling, but the underlying
+ // Archive library does, so this is the cleanest way to wrap it.
+ Error fetch(const llvm::object::Archive::Child &, StringRef reason);
+ const llvm::object::Archive &getArchive() const { return *file; };
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
- void fetch(const llvm::object::Archive::Symbol &sym);
private:
std::unique_ptr<llvm::object::Archive> file;
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 7ed800827f3e1..ab9aaecb621c1 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -13,13 +13,14 @@
#include "Target.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Bitcode/BitcodeReader.h"
using namespace llvm;
using namespace llvm::MachO;
using namespace lld;
using namespace lld::macho;
-template <class LP> static bool hasObjCSection(MemoryBufferRef mb) {
+template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
using Section = typename LP::section;
auto *hdr =
@@ -46,9 +47,20 @@ template <class LP> static bool hasObjCSection(MemoryBufferRef mb) {
return false;
}
-bool macho::hasObjCSection(MemoryBufferRef mb) {
+static bool objectHasObjCSection(MemoryBufferRef mb) {
if (target->wordSize == 8)
- return ::hasObjCSection<LP64>(mb);
+ return ::objectHasObjCSection<LP64>(mb);
else
- return ::hasObjCSection<ILP32>(mb);
+ return ::objectHasObjCSection<ILP32>(mb);
+}
+
+bool macho::hasObjCSection(MemoryBufferRef mb) {
+ switch (identify_magic(mb.getBuffer())) {
+ case file_magic::macho_object:
+ return objectHasObjCSection(mb);
+ case file_magic::bitcode:
+ return check(isBitcodeContainingObjCCategory(mb));
+ default:
+ return false;
+ }
}
diff --git a/lld/test/MachO/invalid/bad-archive-member.s b/lld/test/MachO/invalid/bad-archive-member.s
index d8bb4092153c8..418129d5dfefe 100644
--- a/lld/test/MachO/invalid/bad-archive-member.s
+++ b/lld/test/MachO/invalid/bad-archive-member.s
@@ -4,10 +4,14 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: %lld -dylib -lSystem %t/foo.o -o %t/foo.dylib
# RUN: llvm-ar rcs %t/foo.a %t/foo.dylib
-# RUN: not %lld %t/test.o %t/foo.a -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/foo.a
-# RUN: not %lld %t/test.o -force_load %t/foo.a -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/foo.a
-# RUN: not %lld %t/test.o -ObjC %t/foo.a -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/foo.a
-# CHECK: error: [[FILE]]: archive member foo.dylib has unhandled file type
+# RUN: not %lld %t/test.o %t/foo.a -o /dev/null 2>&1 | FileCheck %s \
+# RUN: --check-prefix=SYM -DFILE=%t/foo.a
+# RUN: not %lld %t/test.o -ObjC %t/foo.a -o /dev/null 2>&1 | FileCheck %s \
+# RUN: --check-prefix=SYM -DFILE=%t/foo.a
+# RUN: not %lld %t/test.o -force_load %t/foo.a -o /dev/null 2>&1 | FileCheck %s \
+# RUN: --check-prefix=FORCE-LOAD -DFILE=%t/foo.a
+# SYM: error: [[FILE]]: could not get the member defining symbol _foo: foo.dylib has unhandled file type
+# FORCE-LOAD: error: [[FILE]]: -force_load failed to load archive member: foo.dylib has unhandled file type
#--- foo.s
.globl _foo
diff --git a/lld/test/MachO/thin-archive.s b/lld/test/MachO/thin-archive.s
index 4af3f7c17ce37..248209b3f336a 100644
--- a/lld/test/MachO/thin-archive.s
+++ b/lld/test/MachO/thin-archive.s
@@ -25,8 +25,8 @@
# CHECK: __Z1fv
# CHECK: _main
-# NOOBJ: error: {{.*}}lib_thin.a: could not get the buffer for the member defining symbol f(): '{{.*}}lib.o':
-# NOOBJNODEMANGLE: error: {{.*}}lib_thin.a: could not get the buffer for the member defining symbol __Z1fv: '{{.*}}lib.o':
+# NOOBJ: error: {{.*}}lib_thin.a: could not get the member defining symbol f(): '{{.*}}lib.o': {{[N|n]}}o such file or directory
+# NOOBJNODEMANGLE: error: {{.*}}lib_thin.a: could not get the member defining symbol __Z1fv: '{{.*}}lib.o': {{[N|n]}}o such file or directory
#--- mangled-symbol.s
.globl __Z1fv
More information about the llvm-commits
mailing list