[lld] [lld-macho] Support archives without index (PR #132942)

Leonard Grey via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 25 07:49:15 PDT 2025


https://github.com/speednoisemovement created https://github.com/llvm/llvm-project/pull/132942

This is a ~port of https://reviews.llvm.org/D117284. Like in that change, archives without indices are treated as a collection of lazy object files (as in `--start-lib/--end-lib`)

Porting the ELF follow-up to convert *all* archives to the lazy object code path (https://reviews.llvm.org/D119074) is a natural next step, but we would need to ensure the assertions about memory use hold for Mach-O.

NB: without an index, we can't do the part of the `-ObjC` scan where we check for Objective-C symbols directly. We *can* still check for `__obcj` sections so I wonder how much of a problem this actually is, since I'm not sure how the "symbols but no sections" case can appear in the wild.

>From 6dbb0df5c6595e731fba7a15b81abfef5e2b49b2 Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Thu, 20 Mar 2025 14:01:22 -0400
Subject: [PATCH 1/2] [lld-macho] Support archives with no index

---
 lld/MachO/Driver.cpp                      | 11 ++--
 lld/MachO/InputFiles.cpp                  | 68 +++++++++++++++++------
 lld/MachO/InputFiles.h                    |  6 +-
 lld/test/MachO/archive-no-index.s         | 39 +++++++++++++
 lld/test/MachO/invalid/archive-no-index.s | 32 -----------
 5 files changed, 99 insertions(+), 57 deletions(-)
 create mode 100644 lld/test/MachO/archive-no-index.s
 delete mode 100644 lld/test/MachO/invalid/archive-no-index.s

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index ee26fafb50b73..dbb6b9a50a82f 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -314,8 +314,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
       std::unique_ptr<object::Archive> archive = CHECK(
           object::Archive::create(mbref), path + ": failed to parse archive");
 
-      if (!archive->isEmpty() && !archive->hasSymbolTable())
-        error(path + ": archive has no index; run ranlib to add one");
       file = make<ArchiveFile>(std::move(archive), isForceHidden);
 
       if (tar && file->getArchive().isThin())
@@ -362,9 +360,11 @@ static InputFile *addFile(StringRef path, LoadType loadType,
                 ": Archive::children failed: " + toString(std::move(e)));
       }
     } else if (isCommandLineLoad && config->forceLoadObjC) {
-      for (const object::Archive::Symbol &sym : file->getArchive().symbols())
-        if (sym.getName().starts_with(objc::symbol_names::klass))
-          file->fetch(sym);
+      if (file->getArchive().getNumberOfSymbols() > 0) {
+        for (const object::Archive::Symbol &sym : file->getArchive().symbols())
+          if (sym.getName().starts_with(objc::symbol_names::klass))
+            file->fetch(sym);
+      }
 
       // TODO: no need to look for ObjC sections for a given archive member if
       // we already found that it contains an ObjC symbol.
@@ -394,7 +394,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
                 ": Archive::children failed: " + toString(std::move(e)));
       }
     }
-
     file->addLazySymbols();
     loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad};
     newFile = file;
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 9adfbc9d3f6f5..96bb3a456261b 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2156,9 +2156,34 @@ ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
 void ArchiveFile::addLazySymbols() {
   // Avoid calling getMemoryBufferRef() on zero-symbol archive
   // since that crashes.
-  if (file->isEmpty() || file->getNumberOfSymbols() == 0)
+  if (file->isEmpty())
     return;
 
+  if (file->getNumberOfSymbols() == 0) {
+    // No index, treat each child as a lazy object file.
+    Error e = Error::success();
+    for (const object::Archive::Child &c : file->children(e)) {
+      // Check `seen` but don't insert so a future eager load can still happen.
+      if (seen.contains(c.getChildOffset()))
+        continue;
+      if (!seenLazy.insert(c.getChildOffset()).second) {
+        continue;
+      }
+      // First check seen.
+      // Then, write to and check seenLazy
+      // Then, get the file, check for error, and add it to inputs.
+      auto file = childToObjectFile(c, /*lazy=*/true);
+      if (!file)
+        error(toString(this) +
+              ": couldn't process child: " + toString(file.takeError()));
+      inputFiles.insert(*file);
+    }
+    if (e)
+      error(toString(this) +
+            ": Archive::children failed: " + toString(std::move(e)));
+    return;
+  }
+
   Error err = Error::success();
   auto child = file->child_begin(err);
   // Ignore the I/O error here - will be reported later.
@@ -2188,16 +2213,17 @@ void ArchiveFile::addLazySymbols() {
 
 static Expected<InputFile *>
 loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
-                  uint64_t offsetInArchive, bool forceHidden, bool compatArch) {
+                  uint64_t offsetInArchive, bool forceHidden, bool compatArch,
+                  bool lazy) {
   if (config->zeroModTime)
     modTime = 0;
 
   switch (identify_magic(mb.getBuffer())) {
   case file_magic::macho_object:
-    return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden,
+    return make<ObjFile>(mb, modTime, archiveName, lazy, forceHidden,
                          compatArch);
   case file_magic::bitcode:
-    return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false,
+    return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy,
                              forceHidden, compatArch);
   default:
     return createStringError(inconvertibleErrorCode(),
@@ -2206,22 +2232,11 @@ loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
   }
 }
 
-Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
+Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason,
+                         bool lazy) {
   if (!seen.insert(c.getChildOffset()).second)
     return Error::success();
-
-  Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
-  if (!mb)
-    return mb.takeError();
-
-  Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
-  if (!modTime)
-    return modTime.takeError();
-
-  Expected<InputFile *> file =
-      loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
-                        forceHidden, compatArch);
-
+  auto file = childToObjectFile(c, /*lazy=*/false);
   if (!file)
     return file.takeError();
 
@@ -2248,6 +2263,23 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
           toMachOString(symCopy) + ": " + toString(std::move(e)));
 }
 
+Expected<InputFile *>
+ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c,
+                               bool lazy) {
+  Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+  if (!mb)
+    return mb.takeError();
+
+  Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
+  if (!modTime)
+    return modTime.takeError();
+
+  Expected<InputFile *> file =
+      loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
+                        forceHidden, compatArch, lazy);
+  return file;
+}
+
 static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
                                           BitcodeFile &file) {
   StringRef name = saver().save(objSym.getName());
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index bc8c8038a39d1..7b2b4769e0b2d 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -292,15 +292,19 @@ class ArchiveFile final : public InputFile {
   void fetch(const llvm::object::Archive::Symbol &);
   // LLD normally doesn't use Error for error-handling, but the underlying
   // Archive library does, so this is the cleanest way to wrap it.
-  Error fetch(const llvm::object::Archive::Child &, StringRef reason);
+  Error fetch(const llvm::object::Archive::Child &, StringRef reason,
+              bool lazy = false);
   const llvm::object::Archive &getArchive() const { return *file; };
   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
 
 private:
+  Expected<InputFile *> childToObjectFile(const llvm::object::Archive::Child &c,
+                                          bool lazy);
   std::unique_ptr<llvm::object::Archive> file;
   // Keep track of children fetched from the archive by tracking
   // which address offsets have been fetched already.
   llvm::DenseSet<uint64_t> seen;
+  llvm::DenseSet<uint64_t> seenLazy;
   // Load all symbols with hidden visibility (-load_hidden).
   bool forceHidden;
 };
diff --git a/lld/test/MachO/archive-no-index.s b/lld/test/MachO/archive-no-index.s
new file mode 100644
index 0000000000000..4135df0c69f4a
--- /dev/null
+++ b/lld/test/MachO/archive-no-index.s
@@ -0,0 +1,39 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib.o %t/lib.s
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib2.o %t/lib2.s
+# RUN: llvm-ar crST %t/lib.a %t/lib.o %t/lib2.o
+# RUN: %lld %t/main.o %t/lib.a -o %t/out
+
+## Test that every kind of eager load mechanism still works.
+# RUN: %lld %t/main.o %t/lib.a -all_load -o %t/all_load
+# RUN: llvm-nm %t/all_load | FileCheck %s --check-prefix FORCED-LOAD
+# RUN: %lld %t/main.o -force_load %t/lib.a -o %t/force_load
+# RUN: llvm-nm %t/force_load | FileCheck %s --check-prefix FORCED-LOAD
+# RUN: %lld %t/main.o %t/lib.a -ObjC -o %t/objc
+# RUN: llvm-nm %t/objc | FileCheck %s --check-prefix FORCED-LOAD
+
+# FORCED-LOAD: T _bar
+
+#--- lib.s
+.global _foo
+_foo:
+    ret
+
+#--- lib2.s
+.section __DATA,__objc_catlist
+.quad 0x1234
+
+.section __TEXT,__text
+.global _bar
+_bar:
+    ret
+
+#--- main.s
+.global _main
+_main:
+    call _foo    
+    mov $0, %rax
+    ret
\ No newline at end of file
diff --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s
deleted file mode 100644
index 9cda945652500..0000000000000
--- a/lld/test/MachO/invalid/archive-no-index.s
+++ /dev/null
@@ -1,32 +0,0 @@
-# REQUIRES: x86
-# RUN: rm -rf %t; split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
-
-# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o
-
-# RUN: not %lld %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s
-# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one
-
-#--- 2.s
-.globl _boo
-_boo:
-  ret
-
-#--- 3.s
-.globl _bar
-_bar:
-  ret
-
-#--- 4.s
-.globl _undefined, _unused
-_unused:
-  ret
-
-#--- main.s
-.global _main
-_main:
-  mov $0, %rax
-  ret

>From 59f216ae9376f8b9bd81f7cef0703100c36db81e Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey at chromium.org>
Date: Tue, 25 Mar 2025 10:33:12 -0400
Subject: [PATCH 2/2] Cleanup

---
 lld/MachO/InputFiles.cpp          | 15 ++++-----------
 lld/MachO/InputFiles.h            |  3 +--
 lld/test/MachO/archive-no-index.s |  6 +++++-
 3 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 96bb3a456261b..31eba6b47140d 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2166,12 +2166,8 @@ void ArchiveFile::addLazySymbols() {
       // Check `seen` but don't insert so a future eager load can still happen.
       if (seen.contains(c.getChildOffset()))
         continue;
-      if (!seenLazy.insert(c.getChildOffset()).second) {
+      if (!seenLazy.insert(c.getChildOffset()).second)
         continue;
-      }
-      // First check seen.
-      // Then, write to and check seenLazy
-      // Then, get the file, check for error, and add it to inputs.
       auto file = childToObjectFile(c, /*lazy=*/true);
       if (!file)
         error(toString(this) +
@@ -2232,8 +2228,7 @@ loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
   }
 }
 
-Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason,
-                         bool lazy) {
+Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
   if (!seen.insert(c.getChildOffset()).second)
     return Error::success();
   auto file = childToObjectFile(c, /*lazy=*/false);
@@ -2274,10 +2269,8 @@ ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c,
   if (!modTime)
     return modTime.takeError();
 
-  Expected<InputFile *> file =
-      loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
-                        forceHidden, compatArch, lazy);
-  return file;
+  return loadArchiveMember(*mb, toTimeT(*modTime), getName(),
+                           c.getChildOffset(), forceHidden, compatArch, lazy);
 }
 
 static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 7b2b4769e0b2d..2d5bceb160445 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -292,8 +292,7 @@ class ArchiveFile final : public InputFile {
   void fetch(const llvm::object::Archive::Symbol &);
   // LLD normally doesn't use Error for error-handling, but the underlying
   // Archive library does, so this is the cleanest way to wrap it.
-  Error fetch(const llvm::object::Archive::Child &, StringRef reason,
-              bool lazy = false);
+  Error fetch(const llvm::object::Archive::Child &, StringRef reason);
   const llvm::object::Archive &getArchive() const { return *file; };
   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
 
diff --git a/lld/test/MachO/archive-no-index.s b/lld/test/MachO/archive-no-index.s
index 4135df0c69f4a..37d28fda5c920 100644
--- a/lld/test/MachO/archive-no-index.s
+++ b/lld/test/MachO/archive-no-index.s
@@ -6,6 +6,10 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib2.o %t/lib2.s
 # RUN: llvm-ar crST %t/lib.a %t/lib.o %t/lib2.o
 # RUN: %lld %t/main.o %t/lib.a -o %t/out
+# RUN: llvm-nm %t/out | FileCheck %s
+
+# CHECK-NOT: T _bar
+# CHECK:     T _foo
 
 ## Test that every kind of eager load mechanism still works.
 # RUN: %lld %t/main.o %t/lib.a -all_load -o %t/all_load
@@ -36,4 +40,4 @@ _bar:
 _main:
     call _foo    
     mov $0, %rax
-    ret
\ No newline at end of file
+    ret



More information about the llvm-commits mailing list