[lld] 5ba9063 - [lld-macho] Fixed crashes when linking with incompatible-arch archives/

Vy Nguyen via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 4 06:26:14 PDT 2023


Author: Vy Nguyen
Date: 2023-08-04T09:25:59-04:00
New Revision: 5ba906327b0178b7635baa89710eb2967db1aa18

URL: https://github.com/llvm/llvm-project/commit/5ba906327b0178b7635baa89710eb2967db1aa18
DIFF: https://github.com/llvm/llvm-project/commit/5ba906327b0178b7635baa89710eb2967db1aa18.diff

LOG: [lld-macho] Fixed crashes when linking with incompatible-arch archives/

Two changes:
 - Avoid crashing in predicate functions.
   Querying the property of the Symbols via these is*() functions shouldn't crash the program - the answer should just be "false".
   Currently, having them throw UNREACHABLE already (incorrectly) crash certain code paths involving macho::validateSymbolRelocation() .

 - Simply ignore input archives with incompatible arch (changes from PRESIDENT810@)

Differential Revision: https://reviews.llvm.org/D156468

Added: 
    lld/test/MachO/ignore-incompat-arch.s

Modified: 
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/Symbols.h
    lld/test/MachO/objc.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index c89f6f4722dccc..15902d0e3eefd2 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -185,6 +185,27 @@ static bool checkCompatibility(const InputFile *input) {
   return true;
 }
 
+template <class Header>
+static bool compatWithTargetArch(const InputFile *file, const Header *hdr) {
+  uint32_t cpuType;
+  std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(config->arch());
+
+  if (hdr->cputype != cpuType) {
+    Architecture arch =
+        getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
+    auto msg = config->errorForArchMismatch
+                   ? static_cast<void (*)(const Twine &)>(error)
+                   : warn;
+
+    msg(toString(file) + " has architecture " + getArchitectureName(arch) +
+        " which is incompatible with target architecture " +
+        getArchitectureName(config->arch()));
+    return false;
+  }
+
+  return checkCompatibility(file);
+}
+
 // This cache mostly exists to store system libraries (and .tbds) as they're
 // loaded, rather than the input archives, which are already cached at a higher
 // level, and other files like the filelist that are only read once.
@@ -930,9 +951,10 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
 }
 
 ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
-                 bool lazy, bool forceHidden)
+                 bool lazy, bool forceHidden, bool compatArch)
     : InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden) {
   this->archiveName = std::string(archiveName);
+  this->compatArch = compatArch;
   if (lazy) {
     if (target->wordSize == 8)
       parseLazy<LP64>();
@@ -955,21 +977,10 @@ template <class LP> void ObjFile::parse() {
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
   auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
 
-  uint32_t cpuType;
-  std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(config->arch());
-  if (hdr->cputype != cpuType) {
-    Architecture arch =
-        getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
-    auto msg = config->errorForArchMismatch
-                   ? static_cast<void (*)(const Twine &)>(error)
-                   : warn;
-    msg(toString(this) + " has architecture " + getArchitectureName(arch) +
-        " which is incompatible with target architecture " +
-        getArchitectureName(config->arch()));
+  // If we've already checked the arch, then don't need to check again.
+  if (!compatArch)
     return;
-  }
-
-  if (!checkCompatibility(this))
+  if (!(compatArch = compatWithTargetArch(this, hdr)))
     return;
 
   for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
@@ -1026,6 +1037,12 @@ template <class LP> void ObjFile::parseLazy() {
 
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
   auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
+
+  if (!compatArch)
+    return;
+  if (!(compatArch = compatWithTargetArch(this, hdr)))
+    return;
+
   const load_command *cmd = findCommand(hdr, LC_SYMTAB);
   if (!cmd)
     return;
@@ -2089,22 +2106,45 @@ ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
       forceHidden(forceHidden) {}
 
 void ArchiveFile::addLazySymbols() {
+  Error err = Error::success();
+  Expected<MemoryBufferRef> mbOrErr =
+      this->file->child_begin(err)->getMemoryBufferRef();
+
+  // Ignore the I/O error here - will be reported later.
+  if (!mbOrErr) {
+    llvm::consumeError(mbOrErr.takeError());
+  } else if (!err) {
+    if (identify_magic(mbOrErr->getBuffer()) == file_magic::macho_object) {
+      if (target->wordSize == 8)
+        compatArch = compatWithTargetArch(
+            this, reinterpret_cast<const LP64::mach_header *>(
+                      mbOrErr->getBufferStart()));
+      else
+        compatArch = compatWithTargetArch(
+            this, reinterpret_cast<const ILP32::mach_header *>(
+                      mbOrErr->getBufferStart()));
+
+      if (!compatArch)
+        return;
+    }
+  }
   for (const object::Archive::Symbol &sym : file->symbols())
     symtab->addLazyArchive(sym.getName(), this, sym);
 }
 
 static Expected<InputFile *>
 loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
-                  uint64_t offsetInArchive, bool forceHidden) {
+                  uint64_t offsetInArchive, bool forceHidden, bool compatArch) {
   if (config->zeroModTime)
     modTime = 0;
 
   switch (identify_magic(mb.getBuffer())) {
   case file_magic::macho_object:
-    return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden);
+    return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden,
+                         compatArch);
   case file_magic::bitcode:
     return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false,
-                             forceHidden);
+                             forceHidden, compatArch);
   default:
     return createStringError(inconvertibleErrorCode(),
                              mb.getBufferIdentifier() +
@@ -2128,8 +2168,9 @@ Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
   if (!modTime)
     return modTime.takeError();
 
-  Expected<InputFile *> file = loadArchiveMember(
-      *mb, toTimeT(*modTime), getName(), c.getChildOffset(), forceHidden);
+  Expected<InputFile *> file =
+      loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
+                        forceHidden, compatArch);
 
   if (!file)
     return file.takeError();
@@ -2192,13 +2233,20 @@ static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
 }
 
 BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
-                         uint64_t offsetInArchive, bool lazy, bool forceHidden)
+                         uint64_t offsetInArchive, bool lazy, bool forceHidden,
+                         bool compatArch)
     : InputFile(BitcodeKind, mb, lazy), forceHidden(forceHidden) {
   this->archiveName = std::string(archiveName);
+  this->compatArch = compatArch;
   std::string path = mb.getBufferIdentifier().str();
   if (config->thinLTOIndexOnly)
     path = replaceThinLTOSuffix(mb.getBufferIdentifier());
 
+  // If the parent archive already determines that the arch is not compat with
+  // target, then just return.
+  if (!compatArch)
+    return;
+
   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
   // name. If two members with the same name are provided, this causes a
   // collision and ThinLTO can't proceed.

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 66d46e46fa73fd..efe5974d78ef72 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -140,6 +140,9 @@ class InputFile {
 
   InputFile(Kind, const llvm::MachO::InterfaceFile &);
 
+  // If true, this input's arch is compatiable with target.
+  bool compatArch = true;
+
 private:
   const Kind fileKind;
   const StringRef name;
@@ -157,7 +160,7 @@ struct FDE {
 class ObjFile final : public InputFile {
 public:
   ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
-          bool lazy = false, bool forceHidden = false);
+          bool lazy = false, bool forceHidden = false, bool compatArch = true);
   ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
   ArrayRef<uint8_t> getOptimizationHints() const;
   template <class LP> void parse();
@@ -301,7 +304,7 @@ class BitcodeFile final : public InputFile {
 public:
   explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
                        uint64_t offsetInArchive, bool lazy = false,
-                       bool forceHidden = false);
+                       bool forceHidden = false, bool compatArch = true);
   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
   void parse();
 

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 88efc7a18c7688..f30294dbea9cb2 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -58,14 +58,14 @@ class Symbol {
 
   virtual uint64_t getVA() const { return 0; }
 
-  virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
+  virtual bool isWeakDef() const { return false; }
 
   // Only undefined or dylib symbols can be weak references. A weak reference
   // need not be satisfied at runtime, e.g. due to the symbol not being
   // available on a given target platform.
   virtual bool isWeakRef() const { return false; }
 
-  virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
+  virtual bool isTlv() const { return false; }
 
   // Whether this symbol is in the GOT or TLVPointer sections.
   bool isInGot() const { return gotIndex != UINT32_MAX; }

diff  --git a/lld/test/MachO/ignore-incompat-arch.s b/lld/test/MachO/ignore-incompat-arch.s
new file mode 100644
index 00000000000000..79c7cc7db34cd4
--- /dev/null
+++ b/lld/test/MachO/ignore-incompat-arch.s
@@ -0,0 +1,71 @@
+## Test that LLD correctly ignored archives with incompatible architecture without crashing.
+
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos  %t/callee.s -o %t/callee_arm64.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos  %t/callee.s -o %t/callee_x86_64.o
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos  %t/caller.s -o %t/caller_arm64.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos  %t/caller.s -o %t/caller_x86_64.o
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos  %t/main.s -o %t/main_arm64.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos  %t/main.s -o %t/main_x86_64.o
+
+# RUN: llvm-ar rc %t/libcallee_arm64.a %t/callee_arm64.o
+# RUN: llvm-ar r %t/libcallee_x86.a %t/callee_x86_64.o
+
+# RUN: llvm-ar r %t/libcaller_arm64.a %t/caller_arm64.o
+# RUN: llvm-ar r %t/libcaller_x86.a %t/caller_x86_64.o
+
+## Symbol from the arm64 archive should be ignored even tho it appears before the x86 archive.
+# RUN: %no-fatal-warnings-lld -map %t/x86_a.map -arch x86_64 %t/main_x86_64.o %t/libcallee_arm64.a %t/libcallee_x86.a %t/libcaller_x86.a -o %t/x86_a.out 2>&1 \
+# RUN:     | FileCheck -check-prefix=X86-WARNING %s
+
+# RUN: %no-fatal-warnings-lld -map %t/x86_b.map -arch x86_64 %t/main_x86_64.o %t/libcallee_x86.a %t/libcallee_arm64.a %t/libcaller_x86.a -o %t/x86_b.out 2>&1 \
+# RUN:     | FileCheck -check-prefix=X86-WARNING %s
+
+# RUN: %no-fatal-warnings-lld -map %t/arm64_a.map -arch arm64 %t/main_arm64.o %t/libcallee_x86.a %t/libcallee_arm64.a %t/libcaller_arm64.a -o %t/arm64_a.out 2>&1 \
+# RUN:     | FileCheck -check-prefix=ARM64-WARNING %s
+
+# RUN: %no-fatal-warnings-lld -map %t/arm64_b.map -arch arm64 %t/main_arm64.o %t/libcallee_arm64.a %t/libcallee_x86.a %t/libcaller_arm64.a -o %t/arm64_b.out 2>&1 \
+# RUN:     | FileCheck -check-prefix=ARM64-WARNING %s
+
+## Verify that the output doesn't take any symbol from the in-compat archive
+# RUN: FileCheck --check-prefix=SYM-X86 %s --input-file=%t/x86_a.map
+# RUN: FileCheck --check-prefix=SYM-X86 %s --input-file=%t/x86_b.map
+
+# RUN: FileCheck --check-prefix=SYM-ARM64 %s --input-file=%t/arm64_a.map
+# RUN: FileCheck --check-prefix=SYM-ARM64 %s --input-file=%t/arm64_b.map
+
+
+# X86-WARNING: libcallee_arm64.a has architecture arm64 which is incompatible with target architecture x86_64
+
+# ARM64-WARNING: libcallee_x86.a has architecture x86_64 which is incompatible with target architecture arm64
+
+# SYM-X86-NOT: libcallee_arm64.a
+# SYM-X86: {{.+}}main_x86_64.o
+# SYM-X86: {{.+}}libcallee_x86.a(callee_x86_64.o)
+# SYM-X86: {{.+}}libcaller_x86.a(caller_x86_64.o)
+
+# SYM-ARM64-NOT: libcallee_x86.a
+# SYM-ARM64: {{.+}}main_arm64.o
+# SYM-ARM64: {{.+}}libcallee_arm64.a(callee_arm64.o)
+# SYM-ARM64: {{.+}}libcaller_arm64.a(caller_arm64.o)
+
+
+#--- callee.s
+.globl _callee
+_callee:
+  ret
+
+#--- caller.s
+.globl _caller
+_caller:
+  .quad _callee
+  ret
+
+#--- main.s
+.globl _main
+_main:
+  .quad _caller
+  ret

diff  --git a/lld/test/MachO/objc.s b/lld/test/MachO/objc.s
index f79789c0e5a7b1..e7074141f0113f 100644
--- a/lld/test/MachO/objc.s
+++ b/lld/test/MachO/objc.s
@@ -14,15 +14,19 @@
 
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
 
-# RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC -ObjC
+# RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC -ObjC 
 # RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=OBJC
 
 # RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC2 -ObjC
 # RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=OBJC
 
-# RUN: %lld -lSystem %t/test.o -o %t/test --start-lib %t/no-objc.o %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/has-swift-proto.o %t/wrong-arch.o --end-lib -ObjC
+# RUN: %no-fatal-warnings-lld -lSystem %t/test.o -o %t/test --start-lib %t/no-objc.o %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/has-swift-proto.o %t/wrong-arch.o --end-lib -ObjC 2>&1 \
+# RUN:     | FileCheck -check-prefix=WARNING %s
 # RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=OBJC
 
+# WARNING: {{.+}}wrong-arch.o has architecture armv7 which is incompatible with target architecture x86_64
+# WARNING-NOT: {{.}}
+
 # OBJC:       Sections:
 # OBJC-NEXT:  Idx Name            Size   VMA  Type
 # OBJC-NEXT:    0 __text          {{.*}}      TEXT


        


More information about the llvm-commits mailing list