[lld] [lld-macho] Support archives without index (PR #132942)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 25 07:49:51 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld-macho

Author: Leonard Grey (speednoisemovement)

<details>
<summary>Changes</summary>

This is a ~port of https://reviews.llvm.org/D117284. Like in that change, archives without indices are treated as a collection of lazy object files (as in `--start-lib/--end-lib`)

Porting the ELF follow-up to convert *all* archives to the lazy object code path (https://reviews.llvm.org/D119074) is a natural next step, but we would need to ensure the assertions about memory use hold for Mach-O.

NB: without an index, we can't do the part of the `-ObjC` scan where we check for Objective-C symbols directly. We *can* still check for `__obcj` sections so I wonder how much of a problem this actually is, since I'm not sure how the "symbols but no sections" case can appear in the wild.

---
Full diff: https://github.com/llvm/llvm-project/pull/132942.diff


5 Files Affected:

- (modified) lld/MachO/Driver.cpp (+5-6) 
- (modified) lld/MachO/InputFiles.cpp (+42-17) 
- (modified) lld/MachO/InputFiles.h (+3) 
- (added) lld/test/MachO/archive-no-index.s (+43) 
- (removed) lld/test/MachO/invalid/archive-no-index.s (-32) 


``````````diff
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index ee26fafb50b73..dbb6b9a50a82f 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -314,8 +314,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
       std::unique_ptr<object::Archive> archive = CHECK(
           object::Archive::create(mbref), path + ": failed to parse archive");
 
-      if (!archive->isEmpty() && !archive->hasSymbolTable())
-        error(path + ": archive has no index; run ranlib to add one");
       file = make<ArchiveFile>(std::move(archive), isForceHidden);
 
       if (tar && file->getArchive().isThin())
@@ -362,9 +360,11 @@ static InputFile *addFile(StringRef path, LoadType loadType,
                 ": Archive::children failed: " + toString(std::move(e)));
       }
     } else if (isCommandLineLoad && config->forceLoadObjC) {
-      for (const object::Archive::Symbol &sym : file->getArchive().symbols())
-        if (sym.getName().starts_with(objc::symbol_names::klass))
-          file->fetch(sym);
+      if (file->getArchive().getNumberOfSymbols() > 0) {
+        for (const object::Archive::Symbol &sym : file->getArchive().symbols())
+          if (sym.getName().starts_with(objc::symbol_names::klass))
+            file->fetch(sym);
+      }
 
       // TODO: no need to look for ObjC sections for a given archive member if
       // we already found that it contains an ObjC symbol.
@@ -394,7 +394,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
                 ": Archive::children failed: " + toString(std::move(e)));
       }
     }
-
     file->addLazySymbols();
     loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad};
     newFile = file;
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 9adfbc9d3f6f5..31eba6b47140d 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2156,9 +2156,30 @@ ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
 void ArchiveFile::addLazySymbols() {
   // Avoid calling getMemoryBufferRef() on zero-symbol archive
   // since that crashes.
-  if (file->isEmpty() || file->getNumberOfSymbols() == 0)
+  if (file->isEmpty())
     return;
 
+  if (file->getNumberOfSymbols() == 0) {
+    // No index, treat each child as a lazy object file.
+    Error e = Error::success();
+    for (const object::Archive::Child &c : file->children(e)) {
+      // Check `seen` but don't insert so a future eager load can still happen.
+      if (seen.contains(c.getChildOffset()))
+        continue;
+      if (!seenLazy.insert(c.getChildOffset()).second)
+        continue;
+      auto file = childToObjectFile(c, /*lazy=*/true);
+      if (!file)
+        error(toString(this) +
+              ": couldn't process child: " + toString(file.takeError()));
+      inputFiles.insert(*file);
+    }
+    if (e)
+      error(toString(this) +
+            ": Archive::children failed: " + toString(std::move(e)));
+    return;
+  }
+
   Error err = Error::success();
   auto child = file->child_begin(err);
   // Ignore the I/O error here - will be reported later.
@@ -2188,16 +2209,17 @@ void ArchiveFile::addLazySymbols() {
 
 static Expected<InputFile *>
 loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
-                  uint64_t offsetInArchive, bool forceHidden, bool compatArch) {
+                  uint64_t offsetInArchive, bool forceHidden, bool compatArch,
+                  bool lazy) {
   if (config->zeroModTime)
     modTime = 0;
 
   switch (identify_magic(mb.getBuffer())) {
   case file_magic::macho_object:
-    return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden,
+    return make<ObjFile>(mb, modTime, archiveName, lazy, forceHidden,
                          compatArch);
   case file_magic::bitcode:
-    return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false,
+    return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy,
                              forceHidden, compatArch);
   default:
     return createStringError(inconvertibleErrorCode(),
@@ -2209,19 +2231,7 @@ loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
 Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
   if (!seen.insert(c.getChildOffset()).second)
     return Error::success();
-
-  Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
-  if (!mb)
-    return mb.takeError();
-
-  Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
-  if (!modTime)
-    return modTime.takeError();
-
-  Expected<InputFile *> file =
-      loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
-                        forceHidden, compatArch);
-
+  auto file = childToObjectFile(c, /*lazy=*/false);
   if (!file)
     return file.takeError();
 
@@ -2248,6 +2258,21 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
           toMachOString(symCopy) + ": " + toString(std::move(e)));
 }
 
+Expected<InputFile *>
+ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c,
+                               bool lazy) {
+  Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+  if (!mb)
+    return mb.takeError();
+
+  Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
+  if (!modTime)
+    return modTime.takeError();
+
+  return loadArchiveMember(*mb, toTimeT(*modTime), getName(),
+                           c.getChildOffset(), forceHidden, compatArch, lazy);
+}
+
 static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
                                           BitcodeFile &file) {
   StringRef name = saver().save(objSym.getName());
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index bc8c8038a39d1..2d5bceb160445 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -297,10 +297,13 @@ class ArchiveFile final : public InputFile {
   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
 
 private:
+  Expected<InputFile *> childToObjectFile(const llvm::object::Archive::Child &c,
+                                          bool lazy);
   std::unique_ptr<llvm::object::Archive> file;
   // Keep track of children fetched from the archive by tracking
   // which address offsets have been fetched already.
   llvm::DenseSet<uint64_t> seen;
+  llvm::DenseSet<uint64_t> seenLazy;
   // Load all symbols with hidden visibility (-load_hidden).
   bool forceHidden;
 };
diff --git a/lld/test/MachO/archive-no-index.s b/lld/test/MachO/archive-no-index.s
new file mode 100644
index 0000000000000..37d28fda5c920
--- /dev/null
+++ b/lld/test/MachO/archive-no-index.s
@@ -0,0 +1,43 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib.o %t/lib.s
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib2.o %t/lib2.s
+# RUN: llvm-ar crST %t/lib.a %t/lib.o %t/lib2.o
+# RUN: %lld %t/main.o %t/lib.a -o %t/out
+# RUN: llvm-nm %t/out | FileCheck %s
+
+# CHECK-NOT: T _bar
+# CHECK:     T _foo
+
+## Test that every kind of eager load mechanism still works.
+# RUN: %lld %t/main.o %t/lib.a -all_load -o %t/all_load
+# RUN: llvm-nm %t/all_load | FileCheck %s --check-prefix FORCED-LOAD
+# RUN: %lld %t/main.o -force_load %t/lib.a -o %t/force_load
+# RUN: llvm-nm %t/force_load | FileCheck %s --check-prefix FORCED-LOAD
+# RUN: %lld %t/main.o %t/lib.a -ObjC -o %t/objc
+# RUN: llvm-nm %t/objc | FileCheck %s --check-prefix FORCED-LOAD
+
+# FORCED-LOAD: T _bar
+
+#--- lib.s
+.global _foo
+_foo:
+    ret
+
+#--- lib2.s
+.section __DATA,__objc_catlist
+.quad 0x1234
+
+.section __TEXT,__text
+.global _bar
+_bar:
+    ret
+
+#--- main.s
+.global _main
+_main:
+    call _foo    
+    mov $0, %rax
+    ret
diff --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s
deleted file mode 100644
index 9cda945652500..0000000000000
--- a/lld/test/MachO/invalid/archive-no-index.s
+++ /dev/null
@@ -1,32 +0,0 @@
-# REQUIRES: x86
-# RUN: rm -rf %t; split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
-
-# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o
-
-# RUN: not %lld %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s
-# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one
-
-#--- 2.s
-.globl _boo
-_boo:
-  ret
-
-#--- 3.s
-.globl _bar
-_bar:
-  ret
-
-#--- 4.s
-.globl _undefined, _unused
-_unused:
-  ret
-
-#--- main.s
-.global _main
-_main:
-  mov $0, %rax
-  ret

``````````

</details>


https://github.com/llvm/llvm-project/pull/132942


More information about the llvm-commits mailing list