[lld] [lld-macho] Fix unchecked Error crash for thin archive missing child (PR #97169)

Daniel Bertalan via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 12 14:25:23 PDT 2024


https://github.com/BertalanD updated https://github.com/llvm/llvm-project/pull/97169

>From 059c08241e6af264242fe0c3fffe23ac19f34021 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Sat, 29 Jun 2024 18:47:06 +0200
Subject: [PATCH 1/6] [lld-macho] Fix unchecked Error crash for thin archive
 missing child

The repro file in #94716 contains a few thin archives without their
member object files present. I couldn't figure out why this happened
(issue in repro tarball generation?), but this caused an "unhandled
Error" crash on `LLVM_ENABLE_ABI_BREAKING_CHECKS` builds.
---
 lld/MachO/Driver.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 9dddabcf3680c..eb64e764100b2 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -349,7 +349,12 @@ static InputFile *addFile(StringRef path, LoadType loadType,
         Error e = Error::success();
         for (const object::Archive::Child &c : file->getArchive().children(e)) {
           Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
-          if (!mb || !hasObjCSection(*mb))
+          if (!mb) {
+            llvm::consumeError(mb.takeError());
+            continue;
+          }
+
+          if (!hasObjCSection(*mb))
             continue;
           if (Error e = file->fetch(c, "-ObjC"))
             error(toString(file) + ": -ObjC failed to load archive member: " +

>From dc53f8259a5075788b2643a137689919ba01459e Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Fri, 5 Jul 2024 19:03:40 +0200
Subject: [PATCH 2/6] Add test

---
 lld/test/MachO/reproduce-thin-archive-objc.s | 22 ++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 lld/test/MachO/reproduce-thin-archive-objc.s

diff --git a/lld/test/MachO/reproduce-thin-archive-objc.s b/lld/test/MachO/reproduce-thin-archive-objc.s
new file mode 100644
index 0000000000000..665411b012b3a
--- /dev/null
+++ b/lld/test/MachO/reproduce-thin-archive-objc.s
@@ -0,0 +1,22 @@
+# REQUIRES: x86
+
+## We only save those archive members from thin archives in the repro file that are actually used
+## during linking. This means that if we just iterate members, some of them might not exist.
+## Test that that we handle missing members correctly and don't assert on an unchecked Error.
+# FIXME: Should we eagerly save all members instead?
+
+# RUN: rm -rf %t; mkdir %t
+# RUN: sed s/SYM/_main/   %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o
+# RUN: sed s/SYM/_unused/ %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/unused.o
+
+# RUN: cd %t; llvm-ar rcsT unused.a unused.o
+## FIXME: Absolute paths don't end up relativized in the repro file.
+
+# RUN: %lld %t/main.o %t/unused.a -ObjC --reproduce=%t/repro.tar -o /dev/null
+# RUN: tar xf %t/repro.tar -C %t
+# RUN: cd %t/repro; %lld @response.txt
+
+.text
+.globl SYM
+SYM:
+    ret

>From 8e29cd648b91d4dff0decc7f90c2dbbb81327342 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Fri, 5 Jul 2024 19:09:19 +0200
Subject: [PATCH 3/6] Add explanation

---
 lld/MachO/Driver.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index eb64e764100b2..2e01d65017cc4 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -350,6 +350,8 @@ static InputFile *addFile(StringRef path, LoadType loadType,
         for (const object::Archive::Child &c : file->getArchive().children(e)) {
           Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
           if (!mb) {
+            // Thin archives from repro tarballs can contain missing members
+            // if the member was not loaded later during the initial link.
             llvm::consumeError(mb.takeError());
             continue;
           }

>From 2216582ce84700b0f2b821a02b4c0e4f41dc3b16 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Fri, 5 Jul 2024 21:25:53 +0200
Subject: [PATCH 4/6] Fix Windows CI

---
 lld/test/MachO/reproduce-thin-archive-objc.s | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/test/MachO/reproduce-thin-archive-objc.s b/lld/test/MachO/reproduce-thin-archive-objc.s
index 665411b012b3a..e966e5db3d448 100644
--- a/lld/test/MachO/reproduce-thin-archive-objc.s
+++ b/lld/test/MachO/reproduce-thin-archive-objc.s
@@ -13,7 +13,7 @@
 ## FIXME: Absolute paths don't end up relativized in the repro file.
 
 # RUN: %lld %t/main.o %t/unused.a -ObjC --reproduce=%t/repro.tar -o /dev/null
-# RUN: tar xf %t/repro.tar -C %t
+# RUN: cd %t; tar xf repro.tar
 # RUN: cd %t/repro; %lld @response.txt
 
 .text

>From 02b3fae51b3a06a52ad4ed4e99bcb6fa486697cd Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Fri, 12 Jul 2024 22:43:40 +0200
Subject: [PATCH 5/6] Add warning and mechanism to suppress it

---
 lld/MachO/Config.h   |  1 +
 lld/MachO/Driver.cpp | 13 ++++++++++---
 lld/MachO/Options.td |  3 +++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 4d3f3d05c2338..f20a3e48488c8 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -211,6 +211,7 @@ struct Configuration {
   bool csProfileGenerate = false;
   llvm::StringRef csProfilePath;
   bool pgoWarnMismatch;
+  bool warnThinArchiveMissingMembers;
 
   bool callGraphProfileSort = false;
   llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 1fd57737aa602..2b13350aef0ca 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -350,9 +350,13 @@ static InputFile *addFile(StringRef path, LoadType loadType,
         for (const object::Archive::Child &c : file->getArchive().children(e)) {
           Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
           if (!mb) {
-            // Thin archives from repro tarballs can contain missing members
-            // if the member was not loaded later during the initial link.
-            llvm::consumeError(mb.takeError());
+            // For a long time, only referenced object files were included in
+            // from thin archives in repro tarballs.
+            if (config->warnThinArchiveMissingMembers)
+              warn(toString(file) + ": -ObjC failed to open archive member: " +
+                   toString(mb.takeError()));
+            else
+              llvm::consumeError(mb.takeError());
             continue;
           }
 
@@ -1688,6 +1692,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
   config->csProfilePath = args.getLastArgValue(OPT_cs_profile_path);
   config->pgoWarnMismatch =
       args.hasFlag(OPT_pgo_warn_mismatch, OPT_no_pgo_warn_mismatch, true);
+  config->warnThinArchiveMissingMembers =
+      args.hasFlag(OPT_warn_thin_archive_missing_members,
+                   OPT_no_warn_thin_archive_missing_members, true);
   config->generateUuid = !args.hasArg(OPT_no_uuid);
 
   for (const Arg *arg : args.filtered(OPT_alias)) {
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index aecced9279da4..f529a7d877257 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -147,6 +147,9 @@ def cs_profile_path: Joined<["--"], "cs-profile-path=">,
 defm pgo_warn_mismatch: BB<"pgo-warn-mismatch",
   "turn on warnings about profile cfg mismatch (default)",
   "turn off warnings about profile cfg mismatch">, Group<grp_lld>;
+defm warn_thin_archive_missing_members : BB<"warn-thin-archive-missing-members",
+  "Warn on missing object files referenced by thin archives (default)",
+  "Do not warn on missing object files referenced by thin archives">, Group<grp_lld>;
 
 // This is a complete Options.td compiled from Apple's ld(1) manpage
 // dated 2018-03-07 and cross checked with ld64 source code in repo

>From 3ecb35c1a6ca01d97cbcae8245e62644dd709188 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Fri, 12 Jul 2024 23:24:26 +0200
Subject: [PATCH 6/6] Ensure that unreferenced members get included

---
 lld/MachO/Driver.cpp                         | 17 +++++++++++++++++
 lld/MachO/InputFiles.cpp                     |  4 ----
 lld/test/MachO/reproduce-thin-archive-objc.s | 19 +++++++++++--------
 lld/test/MachO/reproduce-thin-archives.s     | 19 +++++++++++++++----
 4 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 2b13350aef0ca..9784f2ae8fe65 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -270,6 +270,20 @@ struct ArchiveFileInfo {
 
 static DenseMap<StringRef, ArchiveFileInfo> loadedArchives;
 
+static void saveThinArchiveToRepro(ArchiveFile const *file) {
+  assert(tar && file->getArchive().isThin());
+
+  Error e = Error::success();
+  for (const object::Archive::Child &c : file->getArchive().children(e)) {
+    MemoryBufferRef mb = CHECK(c.getMemoryBufferRef(),
+                               toString(file) + ": failed to get buffer");
+    tar->append(relativeToRoot(CHECK(c.getFullName(), file)), mb.getBuffer());
+  }
+  if (e)
+    error(toString(file) +
+          ": Archive::children failed: " + toString(std::move(e)));
+}
+
 static InputFile *addFile(StringRef path, LoadType loadType,
                           bool isLazy = false, bool isExplicit = true,
                           bool isBundleLoader = false,
@@ -301,6 +315,9 @@ static InputFile *addFile(StringRef path, LoadType loadType,
       if (!archive->isEmpty() && !archive->hasSymbolTable())
         error(path + ": archive has no index; run ranlib to add one");
       file = make<ArchiveFile>(std::move(archive), isForceHidden);
+
+      if (tar && file->getArchive().isThin())
+        saveThinArchiveToRepro(file);
     } else {
       file = entry->second.file;
       // Command-line loads take precedence. If file is previously loaded via
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index b40a812f30bd3..3086c9cc4729d 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -2200,10 +2200,6 @@ Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
   if (!mb)
     return mb.takeError();
 
-  // Thin archives refer to .o files, so --reproduce needs the .o files too.
-  if (tar && c.getParent()->isThin())
-    tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb->getBuffer());
-
   Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
   if (!modTime)
     return modTime.takeError();
diff --git a/lld/test/MachO/reproduce-thin-archive-objc.s b/lld/test/MachO/reproduce-thin-archive-objc.s
index e966e5db3d448..c5fe42f130526 100644
--- a/lld/test/MachO/reproduce-thin-archive-objc.s
+++ b/lld/test/MachO/reproduce-thin-archive-objc.s
@@ -1,20 +1,23 @@
 # REQUIRES: x86
 
-## We only save those archive members from thin archives in the repro file that are actually used
-## during linking. This means that if we just iterate members, some of them might not exist.
-## Test that that we handle missing members correctly and don't assert on an unchecked Error.
-# FIXME: Should we eagerly save all members instead?
+## For a long time, LLD only included those members from thin archives that were actually used
+## during linking. However, we need to iterate over all members for -ObjC, check that we don't
+## crash when we encounter a missing member.
 
 # RUN: rm -rf %t; mkdir %t
 # RUN: sed s/SYM/_main/   %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o
 # RUN: sed s/SYM/_unused/ %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/unused.o
 
-# RUN: cd %t; llvm-ar rcsT unused.a unused.o
+# RUN: cd %t; llvm-ar rcsT unused.a unused.o; rm unused.o
 ## FIXME: Absolute paths don't end up relativized in the repro file.
 
-# RUN: %lld %t/main.o %t/unused.a -ObjC --reproduce=%t/repro.tar -o /dev/null
-# RUN: cd %t; tar xf repro.tar
-# RUN: cd %t/repro; %lld @response.txt
+# RUN: %no-fatal-warnings-lld %t/main.o %t/unused.a -ObjC -o /dev/null 2>&1 \
+# RUN:                      | FileCheck %s --check-prefix=WARN
+
+# RUN: %lld %t/main.o %t/unused.a -ObjC --no-warn-thin-archive-missing-members -o /dev/null \
+# RUN:    | FileCheck %s --implicit-check-not 'warning' --allow-empty
+
+# WARN: ld64.lld: warning: {{.*}}unused.a: -ObjC failed to open archive member: 'unused.o'
 
 .text
 .globl SYM
diff --git a/lld/test/MachO/reproduce-thin-archives.s b/lld/test/MachO/reproduce-thin-archives.s
index 9dee3f400e06a..33eeaede7aa41 100644
--- a/lld/test/MachO/reproduce-thin-archives.s
+++ b/lld/test/MachO/reproduce-thin-archives.s
@@ -1,10 +1,11 @@
 # REQUIRES: x86
 
-# RUN: rm -rf %t.dir
-# RUN: mkdir -p %t.dir
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.dir/foo.o
+# RUN: rm -rf %t.dir; split-file %s %t.dir
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t.dir/foo.s -o %t.dir/foo.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t.dir/unused.s -o %t.dir/unused.o
 # RUN: cd %t.dir
-# RUN: llvm-ar rcsT foo.a foo.o
+# RUN: llvm-ar rcsT foo.a foo.o unused.o
 
 # RUN: %lld foo.a -o /dev/null --reproduce repro.tar
 # RUN: tar tf repro.tar | FileCheck -DPATH='repro/%:t.dir' %s
@@ -12,9 +13,19 @@
 # RUN: %lld -all_load foo.a -o /dev/null --reproduce repro2.tar
 # RUN: tar tf repro2.tar | FileCheck -DPATH='repro2/%:t.dir' %s
 
+# RUN: %lld -ObjC foo.a -o /dev/null --reproduce repro3.tar
+# RUN: tar tf repro3.tar | FileCheck -DPATH='repro3/%:t.dir' %s
+
 # CHECK-DAG: [[PATH]]/foo.a
 # CHECK-DAG: [[PATH]]/foo.o
+# CHECK-DAG: [[PATH]]/unused.o
 
+#--- foo.s
 .globl _main
 _main:
   nop
+
+#--- unused.s
+.globl _unused
+_unused:
+  nop



More information about the llvm-commits mailing list