[lld] a033184 - [lld-macho] Stricter Bitcode Symbol Resolution

Kyungwoo Lee via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 22 12:03:44 PDT 2023


Author: Kyungwoo Lee
Date: 2023-08-22T12:03:17-07:00
New Revision: a033184abb80b84c1fa6c2bc6a915c24f56b40f1

URL: https://github.com/llvm/llvm-project/commit/a033184abb80b84c1fa6c2bc6a915c24f56b40f1
DIFF: https://github.com/llvm/llvm-project/commit/a033184abb80b84c1fa6c2bc6a915c24f56b40f1.diff

LOG: [lld-macho] Stricter Bitcode Symbol Resolution

LLD resolves symbols before performing LTO compilation, assuming that the symbols in question are resolved by the resulting object files from LTO. However, there is a scenario where the prevailing symbols might be resolved incorrectly due to specific assembly symbols not appearing in the symbol table of the bitcode. This patch deals with such a scenario by generating an error instead of silently allowing a mis-linkage.
If a prevailing symbol is resolved through post-loaded archives via LC linker options, a warning will now be issued.

Reviewed By: #lld-macho, thevinster

Differential Revision: https://reviews.llvm.org/D158003

Added: 
    lld/test/MachO/symbol-resolution-error.ll
    lld/test/MachO/symbol-resolution-warning.ll

Modified: 
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/LTO.cpp
    lld/MachO/SymbolTable.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 931d248d016041..a882c96dc221c5 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -964,8 +964,10 @@ void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) {
 
 SmallVector<StringRef> macho::unprocessedLCLinkerOptions;
 ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
-                 bool lazy, bool forceHidden, bool compatArch)
-    : InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden) {
+                 bool lazy, bool forceHidden, bool compatArch,
+                 bool builtFromBitcode)
+    : InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden),
+      builtFromBitcode(builtFromBitcode) {
   this->archiveName = std::string(archiveName);
   this->compatArch = compatArch;
   if (lazy) {

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 7a03cef7121632..2e37e7ba5a0060 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -160,7 +160,8 @@ struct FDE {
 class ObjFile final : public InputFile {
 public:
   ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
-          bool lazy = false, bool forceHidden = false, bool compatArch = true);
+          bool lazy = false, bool forceHidden = false, bool compatArch = true,
+          bool builtFromBitcode = false);
   ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
   ArrayRef<uint8_t> getOptimizationHints() const;
   template <class LP> void parse();
@@ -179,6 +180,7 @@ class ObjFile final : public InputFile {
   Section *addrSigSection = nullptr;
   const uint32_t modTime;
   bool forceHidden;
+  bool builtFromBitcode;
   std::vector<ConcatInputSection *> debugSections;
   std::vector<CallGraphEntry> callGraph;
   llvm::DenseMap<ConcatInputSection *, FDE> fdes;

diff  --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index fdae7e4bd1b7bc..7a852f48dde84b 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -305,7 +305,9 @@ std::vector<ObjFile *> BitcodeCompiler::compile() {
       modTime = getModTime(filePath);
     }
     ret.push_back(make<ObjFile>(
-        MemoryBufferRef(objBuf, saver().save(filePath.str())), modTime, ""));
+        MemoryBufferRef(objBuf, saver().save(filePath.str())), modTime,
+        /*archiveName=*/"", /*lazy=*/false,
+        /*forceHidden=*/false, /*compatArch=*/true, /*builtFromBitcode=*/true));
   }
 
   return ret;

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 283edf24ab067e..825242f2cc72ff 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -150,10 +150,48 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
       overridesWeakDef = !isWeakDef && dysym->isWeakDef();
       dysym->unreference();
     } else if (auto *undef = dyn_cast<Undefined>(s)) {
-      // Preserve the original bitcode file name (instead of using the object
-      // file name).
-      if (undef->wasBitcodeSymbol)
-        file = undef->getFile();
+      if (undef->wasBitcodeSymbol) {
+        auto objFile = dyn_cast<ObjFile>(file);
+        if (!objFile) {
+          // The file must be a native object file, as opposed to potentially
+          // being another bitcode file. A situation arises when some symbols
+          // are defined thru `module asm` and thus they are not present in the
+          // bitcode's symbol table. Consider bitcode modules `A`, `B`, and `C`.
+          // LTO compiles only `A` and `C`, since there's no explicit symbol
+          // reference to `B` other than a symbol from `A` via `module asm`.
+          // After LTO is finished, the missing symbol now appears in the
+          // resulting object file for `A`, which  prematurely resolves another
+          // prevailing symbol with `B` that hasn't been compiled, instead of
+          // the resulting object for `C`. Consequently, an incorrect
+          // relocation is generated for the prevailing symbol.
+          assert(isa<BitcodeFile>(file) && "Bitcode file is expected.");
+          std::string message =
+              "The pending prevailing symbol(" + name.str() +
+              ") in the bitcode file(" + toString(undef->getFile()) +
+              ") is overridden by a non-native object (from bitcode): " +
+              toString(file);
+          error(message);
+        } else if (!objFile->builtFromBitcode) {
+          // Ideally, this should be an object file compiled from a bitcode
+          // file. However, this might not hold true if a LC linker option is
+          // used. In case LTO internalizes a prevailing hidden weak symbol,
+          // there's a situation where an unresolved prevailing symbol might be
+          // linked with the corresponding one from a native library, which is
+          // loaded later after LTO. Although this could potentially result in
+          // an ODR violation, we choose to permit this scenario as a warning.
+          std::string message = "The pending prevailing symbol(" + name.str() +
+                                ") in the bitcode file(" +
+                                toString(undef->getFile()) +
+                                ") is overridden by a post-processed native "
+                                "object (from native archive): " +
+                                toString(file);
+          warn(message);
+        } else {
+          // Preserve the original bitcode file name (instead of using the
+          // object file name).
+          file = undef->getFile();
+        }
+      }
     }
     // Defined symbols take priority over other types of symbols, so in case
     // of a name conflict, we fall through to the replaceSymbol() call below.

diff  --git a/lld/test/MachO/symbol-resolution-error.ll b/lld/test/MachO/symbol-resolution-error.ll
new file mode 100644
index 00000000000000..12a6f5cbc97e47
--- /dev/null
+++ b/lld/test/MachO/symbol-resolution-error.ll
@@ -0,0 +1,63 @@
+; REQUIRES: x86
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt --thinlto-bc %t/f.ll -o %t/f.o
+; RUN: llvm-ar rcs %t/libf.a %t/f.o
+; RUN: opt --thinlto-bc %t/q.ll -o %t/q.o
+; RUN: llvm-ar rcs %t/libq.a %t/q.o
+; RUN: opt --thinlto-bc %t/b.ll -o %t/b.o
+; RUN: llvm-ar rcs %t/libb.a %t/b.o
+; RUN: opt --thinlto-bc %t/m.ll -o %t/m.o
+
+; RUN: not %no-fatal-warnings-lld -lSystem %t/libf.a %t/libq.a %t/libb.a %t/m.o -o %t/test.out 2>&1 | FileCheck %s
+
+; We can't read symbols that are set by `module asm` in the bitcode's symbol table.
+; LTO compiles only f.ll, b.ll, and m.ll without q.ll due to missing reference to `q_asm` in f.ll.
+; However, later `q_asm` appears in the resulting object for `f.ll`, which triggers resolving
+; a ODR symbol `q_odr` from `q.ll,` as opposed to one expected from `b.ll`.
+; As a result, we have invalid relocation for `q_odr` as `q.ll` is neither compiled, nor linked.
+
+; CHECK: error: The pending prevailing symbol(_q_odr) in the bitcode file({{.*}}libb.a(b.o)) is overridden by a non-native object (from bitcode): {{.*}}libq.a(q.o)
+
+;--- f.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+module asm ".no_dead_strip _q_asm"
+
+define i64 @f() {
+  ret i64 1
+}
+
+;--- q.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+ at q_asm = global i64 1
+
+ at q_odr = linkonce_odr hidden unnamed_addr constant i64 0
+
+
+;--- b.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+ at q_odr = linkonce_odr hidden unnamed_addr constant i64 1
+
+define i64 @b() {
+  ret i64 ptrtoint (i64* @q_odr to i64)
+}
+
+;--- m.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+declare i64 @f()
+declare i64 @b()
+
+define i64 @main() {
+  %call1 = call i64 @f()
+  %call2 = call i64 @b()
+  %add = add nsw i64 %call1, %call2
+  ret i64 %add
+}

diff  --git a/lld/test/MachO/symbol-resolution-warning.ll b/lld/test/MachO/symbol-resolution-warning.ll
new file mode 100644
index 00000000000000..cf8992dc289c07
--- /dev/null
+++ b/lld/test/MachO/symbol-resolution-warning.ll
@@ -0,0 +1,61 @@
+; REQUIRES: x86
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt %t/f.ll -o %t/f.o
+; RUN: llvm-ar rcs %t/libf.a %t/f.o
+; RUN: llc -filetype=obj %t/q.ll -o %t/q.o
+; RUN: llvm-ar rcs %t/libq.a %t/q.o
+; RUN: llc -filetype=obj %t/m.ll -o %t/m.o
+
+; RUN: %no-fatal-warnings-lld -dylib -lSystem -L%t %t/libf.a %t/m.o -o %t/test.out 2>&1 | FileCheck %s
+
+; We can't read symbols that are set by module asm in the bitcode's symbol table.
+; LTO internalizes `odr` in `f.ll`, and thus the prevailing `odr` remains unresolved.
+; `q.ll` is loaded after LTO is finished during post-processing LC linker options.
+; The unresolved prevailing `odr` is now resolved with `q.ll`.
+
+; CHECK: warning: The pending prevailing symbol(_odr) in the bitcode file({{.*}}libf.a(f.o)) is overridden by a post-processed native object (from native archive): {{.*}}libq.a(q.o)
+
+;--- f.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+!0 = !{!"-lq"}
+!llvm.linker.options = !{!0}
+
+define i64 @f()  {
+  %1 = call i64 @odr()
+  %2 = call i64 @q()
+  %3 = add i64 %1, %2
+  ret i64 %3
+}
+
+define weak hidden i64 @odr() noinline {
+  ret i64 1
+}
+
+declare i64 @q()
+
+;--- q.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+define i64 @q() {
+  %1 = call i64 @odr()
+  ret i64 %1
+}
+
+define linkonce_odr hidden i64 @odr() noinline {
+  ret i64 2
+}
+
+;--- m.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+declare i64 @f()
+
+define i64 @m() {
+  %1 = call i64 @f()
+  ret i64 %1
+}


        


More information about the llvm-commits mailing list