[lld] [lld-macho] Support archives without index (PR #132942)
Leonard Grey via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 07:49:15 PDT 2025
https://github.com/speednoisemovement created https://github.com/llvm/llvm-project/pull/132942
This is a ~port of https://reviews.llvm.org/D117284. Like in that change, archives without indices are treated as a collection of lazy object files (as in `--start-lib/--end-lib`)
Porting the ELF follow-up to convert *all* archives to the lazy object code path (https://reviews.llvm.org/D119074) is a natural next step, but we would need to ensure the assertions about memory use hold for Mach-O.
NB: without an index, we can't do the part of the `-ObjC` scan where we check for Objective-C symbols directly. We *can* still check for `__obcj` sections so I wonder how much of a problem this actually is, since I'm not sure how the "symbols but no sections" case can appear in the wild.
>From 6dbb0df5c6595e731fba7a15b81abfef5e2b49b2 Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Thu, 20 Mar 2025 14:01:22 -0400
Subject: [PATCH 1/2] [lld-macho] Support archives with no index
---
lld/MachO/Driver.cpp | 11 ++--
lld/MachO/InputFiles.cpp | 68 +++++++++++++++++------
lld/MachO/InputFiles.h | 6 +-
lld/test/MachO/archive-no-index.s | 39 +++++++++++++
lld/test/MachO/invalid/archive-no-index.s | 32 -----------
5 files changed, 99 insertions(+), 57 deletions(-)
create mode 100644 lld/test/MachO/archive-no-index.s
delete mode 100644 lld/test/MachO/invalid/archive-no-index.s
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index ee26fafb50b73..dbb6b9a50a82f 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -314,8 +314,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
std::unique_ptr<object::Archive> archive = CHECK(
object::Archive::create(mbref), path + ": failed to parse archive");
- if (!archive->isEmpty() && !archive->hasSymbolTable())
- error(path + ": archive has no index; run ranlib to add one");
file = make<ArchiveFile>(std::move(archive), isForceHidden);
if (tar && file->getArchive().isThin())
@@ -362,9 +360,11 @@ static InputFile *addFile(StringRef path, LoadType loadType,
": Archive::children failed: " + toString(std::move(e)));
}
} else if (isCommandLineLoad && config->forceLoadObjC) {
- for (const object::Archive::Symbol &sym : file->getArchive().symbols())
- if (sym.getName().starts_with(objc::symbol_names::klass))
- file->fetch(sym);
+ if (file->getArchive().getNumberOfSymbols() > 0) {
+ for (const object::Archive::Symbol &sym : file->getArchive().symbols())
+ if (sym.getName().starts_with(objc::symbol_names::klass))
+ file->fetch(sym);
+ }
// TODO: no need to look for ObjC sections for a given archive member if
// we already found that it contains an ObjC symbol.
@@ -394,7 +394,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
": Archive::children failed: " + toString(std::move(e)));
}
}
-
file->addLazySymbols();
loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad};
newFile = file;
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 9adfbc9d3f6f5..96bb3a456261b 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2156,9 +2156,34 @@ ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
void ArchiveFile::addLazySymbols() {
// Avoid calling getMemoryBufferRef() on zero-symbol archive
// since that crashes.
- if (file->isEmpty() || file->getNumberOfSymbols() == 0)
+ if (file->isEmpty())
return;
+ if (file->getNumberOfSymbols() == 0) {
+ // No index, treat each child as a lazy object file.
+ Error e = Error::success();
+ for (const object::Archive::Child &c : file->children(e)) {
+ // Check `seen` but don't insert so a future eager load can still happen.
+ if (seen.contains(c.getChildOffset()))
+ continue;
+ if (!seenLazy.insert(c.getChildOffset()).second) {
+ continue;
+ }
+ // First check seen.
+ // Then, write to and check seenLazy
+ // Then, get the file, check for error, and add it to inputs.
+ auto file = childToObjectFile(c, /*lazy=*/true);
+ if (!file)
+ error(toString(this) +
+ ": couldn't process child: " + toString(file.takeError()));
+ inputFiles.insert(*file);
+ }
+ if (e)
+ error(toString(this) +
+ ": Archive::children failed: " + toString(std::move(e)));
+ return;
+ }
+
Error err = Error::success();
auto child = file->child_begin(err);
// Ignore the I/O error here - will be reported later.
@@ -2188,16 +2213,17 @@ void ArchiveFile::addLazySymbols() {
static Expected<InputFile *>
loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
- uint64_t offsetInArchive, bool forceHidden, bool compatArch) {
+ uint64_t offsetInArchive, bool forceHidden, bool compatArch,
+ bool lazy) {
if (config->zeroModTime)
modTime = 0;
switch (identify_magic(mb.getBuffer())) {
case file_magic::macho_object:
- return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden,
+ return make<ObjFile>(mb, modTime, archiveName, lazy, forceHidden,
compatArch);
case file_magic::bitcode:
- return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false,
+ return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy,
forceHidden, compatArch);
default:
return createStringError(inconvertibleErrorCode(),
@@ -2206,22 +2232,11 @@ loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
}
}
-Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
+Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason,
+ bool lazy) {
if (!seen.insert(c.getChildOffset()).second)
return Error::success();
-
- Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
- if (!mb)
- return mb.takeError();
-
- Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
- if (!modTime)
- return modTime.takeError();
-
- Expected<InputFile *> file =
- loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
- forceHidden, compatArch);
-
+ auto file = childToObjectFile(c, /*lazy=*/false);
if (!file)
return file.takeError();
@@ -2248,6 +2263,23 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
toMachOString(symCopy) + ": " + toString(std::move(e)));
}
+Expected<InputFile *>
+ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c,
+ bool lazy) {
+ Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+ if (!mb)
+ return mb.takeError();
+
+ Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
+ if (!modTime)
+ return modTime.takeError();
+
+ Expected<InputFile *> file =
+ loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
+ forceHidden, compatArch, lazy);
+ return file;
+}
+
static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
BitcodeFile &file) {
StringRef name = saver().save(objSym.getName());
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index bc8c8038a39d1..7b2b4769e0b2d 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -292,15 +292,19 @@ class ArchiveFile final : public InputFile {
void fetch(const llvm::object::Archive::Symbol &);
// LLD normally doesn't use Error for error-handling, but the underlying
// Archive library does, so this is the cleanest way to wrap it.
- Error fetch(const llvm::object::Archive::Child &, StringRef reason);
+ Error fetch(const llvm::object::Archive::Child &, StringRef reason,
+ bool lazy = false);
const llvm::object::Archive &getArchive() const { return *file; };
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
private:
+ Expected<InputFile *> childToObjectFile(const llvm::object::Archive::Child &c,
+ bool lazy);
std::unique_ptr<llvm::object::Archive> file;
// Keep track of children fetched from the archive by tracking
// which address offsets have been fetched already.
llvm::DenseSet<uint64_t> seen;
+ llvm::DenseSet<uint64_t> seenLazy;
// Load all symbols with hidden visibility (-load_hidden).
bool forceHidden;
};
diff --git a/lld/test/MachO/archive-no-index.s b/lld/test/MachO/archive-no-index.s
new file mode 100644
index 0000000000000..4135df0c69f4a
--- /dev/null
+++ b/lld/test/MachO/archive-no-index.s
@@ -0,0 +1,39 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib.o %t/lib.s
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib2.o %t/lib2.s
+# RUN: llvm-ar crST %t/lib.a %t/lib.o %t/lib2.o
+# RUN: %lld %t/main.o %t/lib.a -o %t/out
+
+## Test that every kind of eager load mechanism still works.
+# RUN: %lld %t/main.o %t/lib.a -all_load -o %t/all_load
+# RUN: llvm-nm %t/all_load | FileCheck %s --check-prefix FORCED-LOAD
+# RUN: %lld %t/main.o -force_load %t/lib.a -o %t/force_load
+# RUN: llvm-nm %t/force_load | FileCheck %s --check-prefix FORCED-LOAD
+# RUN: %lld %t/main.o %t/lib.a -ObjC -o %t/objc
+# RUN: llvm-nm %t/objc | FileCheck %s --check-prefix FORCED-LOAD
+
+# FORCED-LOAD: T _bar
+
+#--- lib.s
+.global _foo
+_foo:
+ ret
+
+#--- lib2.s
+.section __DATA,__objc_catlist
+.quad 0x1234
+
+.section __TEXT,__text
+.global _bar
+_bar:
+ ret
+
+#--- main.s
+.global _main
+_main:
+ call _foo
+ mov $0, %rax
+ ret
\ No newline at end of file
diff --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s
deleted file mode 100644
index 9cda945652500..0000000000000
--- a/lld/test/MachO/invalid/archive-no-index.s
+++ /dev/null
@@ -1,32 +0,0 @@
-# REQUIRES: x86
-# RUN: rm -rf %t; split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
-
-# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o
-
-# RUN: not %lld %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s
-# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one
-
-#--- 2.s
-.globl _boo
-_boo:
- ret
-
-#--- 3.s
-.globl _bar
-_bar:
- ret
-
-#--- 4.s
-.globl _undefined, _unused
-_unused:
- ret
-
-#--- main.s
-.global _main
-_main:
- mov $0, %rax
- ret
>From 59f216ae9376f8b9bd81f7cef0703100c36db81e Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Tue, 25 Mar 2025 10:33:12 -0400
Subject: [PATCH 2/2] Cleanup
---
lld/MachO/InputFiles.cpp | 15 ++++-----------
lld/MachO/InputFiles.h | 3 +--
lld/test/MachO/archive-no-index.s | 6 +++++-
3 files changed, 10 insertions(+), 14 deletions(-)
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 96bb3a456261b..31eba6b47140d 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2166,12 +2166,8 @@ void ArchiveFile::addLazySymbols() {
// Check `seen` but don't insert so a future eager load can still happen.
if (seen.contains(c.getChildOffset()))
continue;
- if (!seenLazy.insert(c.getChildOffset()).second) {
+ if (!seenLazy.insert(c.getChildOffset()).second)
continue;
- }
- // First check seen.
- // Then, write to and check seenLazy
- // Then, get the file, check for error, and add it to inputs.
auto file = childToObjectFile(c, /*lazy=*/true);
if (!file)
error(toString(this) +
@@ -2232,8 +2228,7 @@ loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
}
}
-Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason,
- bool lazy) {
+Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
if (!seen.insert(c.getChildOffset()).second)
return Error::success();
auto file = childToObjectFile(c, /*lazy=*/false);
@@ -2274,10 +2269,8 @@ ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c,
if (!modTime)
return modTime.takeError();
- Expected<InputFile *> file =
- loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
- forceHidden, compatArch, lazy);
- return file;
+ return loadArchiveMember(*mb, toTimeT(*modTime), getName(),
+ c.getChildOffset(), forceHidden, compatArch, lazy);
}
static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 7b2b4769e0b2d..2d5bceb160445 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -292,8 +292,7 @@ class ArchiveFile final : public InputFile {
void fetch(const llvm::object::Archive::Symbol &);
// LLD normally doesn't use Error for error-handling, but the underlying
// Archive library does, so this is the cleanest way to wrap it.
- Error fetch(const llvm::object::Archive::Child &, StringRef reason,
- bool lazy = false);
+ Error fetch(const llvm::object::Archive::Child &, StringRef reason);
const llvm::object::Archive &getArchive() const { return *file; };
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
diff --git a/lld/test/MachO/archive-no-index.s b/lld/test/MachO/archive-no-index.s
index 4135df0c69f4a..37d28fda5c920 100644
--- a/lld/test/MachO/archive-no-index.s
+++ b/lld/test/MachO/archive-no-index.s
@@ -6,6 +6,10 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib2.o %t/lib2.s
# RUN: llvm-ar crST %t/lib.a %t/lib.o %t/lib2.o
# RUN: %lld %t/main.o %t/lib.a -o %t/out
+# RUN: llvm-nm %t/out | FileCheck %s
+
+# CHECK-NOT: T _bar
+# CHECK: T _foo
## Test that every kind of eager load mechanism still works.
# RUN: %lld %t/main.o %t/lib.a -all_load -o %t/all_load
@@ -36,4 +40,4 @@ _bar:
_main:
call _foo
mov $0, %rax
- ret
\ No newline at end of file
+ ret
More information about the llvm-commits
mailing list