[lld] a033184 - [lld-macho] Stricter Bitcode Symbol Resolution
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 22 12:03:44 PDT 2023
Author: Kyungwoo Lee
Date: 2023-08-22T12:03:17-07:00
New Revision: a033184abb80b84c1fa6c2bc6a915c24f56b40f1
URL: https://github.com/llvm/llvm-project/commit/a033184abb80b84c1fa6c2bc6a915c24f56b40f1
DIFF: https://github.com/llvm/llvm-project/commit/a033184abb80b84c1fa6c2bc6a915c24f56b40f1.diff
LOG: [lld-macho] Stricter Bitcode Symbol Resolution
LLD resolves symbols before performing LTO compilation, assuming that the symbols in question are resolved by the resulting object files from LTO. However, there is a scenario where the prevailing symbols might be resolved incorrectly due to specific assembly symbols not appearing in the symbol table of the bitcode. This patch deals with such a scenario by generating an error instead of silently allowing a mis-linkage.
If a prevailing symbol is resolved through post-loaded archives via LC linker options, a warning will now be issued.
Reviewed By: #lld-macho, thevinster
Differential Revision: https://reviews.llvm.org/D158003
Added:
lld/test/MachO/symbol-resolution-error.ll
lld/test/MachO/symbol-resolution-warning.ll
Modified:
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/LTO.cpp
lld/MachO/SymbolTable.cpp
Removed:
################################################################################
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 931d248d016041..a882c96dc221c5 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -964,8 +964,10 @@ void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) {
SmallVector<StringRef> macho::unprocessedLCLinkerOptions;
ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
- bool lazy, bool forceHidden, bool compatArch)
- : InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden) {
+ bool lazy, bool forceHidden, bool compatArch,
+ bool builtFromBitcode)
+ : InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden),
+ builtFromBitcode(builtFromBitcode) {
this->archiveName = std::string(archiveName);
this->compatArch = compatArch;
if (lazy) {
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 7a03cef7121632..2e37e7ba5a0060 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -160,7 +160,8 @@ struct FDE {
class ObjFile final : public InputFile {
public:
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
- bool lazy = false, bool forceHidden = false, bool compatArch = true);
+ bool lazy = false, bool forceHidden = false, bool compatArch = true,
+ bool builtFromBitcode = false);
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
ArrayRef<uint8_t> getOptimizationHints() const;
template <class LP> void parse();
@@ -179,6 +180,7 @@ class ObjFile final : public InputFile {
Section *addrSigSection = nullptr;
const uint32_t modTime;
bool forceHidden;
+ bool builtFromBitcode;
std::vector<ConcatInputSection *> debugSections;
std::vector<CallGraphEntry> callGraph;
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index fdae7e4bd1b7bc..7a852f48dde84b 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -305,7 +305,9 @@ std::vector<ObjFile *> BitcodeCompiler::compile() {
modTime = getModTime(filePath);
}
ret.push_back(make<ObjFile>(
- MemoryBufferRef(objBuf, saver().save(filePath.str())), modTime, ""));
+ MemoryBufferRef(objBuf, saver().save(filePath.str())), modTime,
+ /*archiveName=*/"", /*lazy=*/false,
+ /*forceHidden=*/false, /*compatArch=*/true, /*builtFromBitcode=*/true));
}
return ret;
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 283edf24ab067e..825242f2cc72ff 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -150,10 +150,48 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
overridesWeakDef = !isWeakDef && dysym->isWeakDef();
dysym->unreference();
} else if (auto *undef = dyn_cast<Undefined>(s)) {
- // Preserve the original bitcode file name (instead of using the object
- // file name).
- if (undef->wasBitcodeSymbol)
- file = undef->getFile();
+ if (undef->wasBitcodeSymbol) {
+ auto objFile = dyn_cast<ObjFile>(file);
+ if (!objFile) {
+ // The file must be a native object file, as opposed to potentially
+ // being another bitcode file. A situation arises when some symbols
+ // are defined thru `module asm` and thus they are not present in the
+ // bitcode's symbol table. Consider bitcode modules `A`, `B`, and `C`.
+ // LTO compiles only `A` and `C`, since there's no explicit symbol
+ // reference to `B` other than a symbol from `A` via `module asm`.
+ // After LTO is finished, the missing symbol now appears in the
+ // resulting object file for `A`, which prematurely resolves another
+ // prevailing symbol with `B` that hasn't been compiled, instead of
+ // the resulting object for `C`. Consequently, an incorrect
+ // relocation is generated for the prevailing symbol.
+ assert(isa<BitcodeFile>(file) && "Bitcode file is expected.");
+ std::string message =
+ "The pending prevailing symbol(" + name.str() +
+ ") in the bitcode file(" + toString(undef->getFile()) +
+ ") is overridden by a non-native object (from bitcode): " +
+ toString(file);
+ error(message);
+ } else if (!objFile->builtFromBitcode) {
+ // Ideally, this should be an object file compiled from a bitcode
+ // file. However, this might not hold true if a LC linker option is
+ // used. In case LTO internalizes a prevailing hidden weak symbol,
+ // there's a situation where an unresolved prevailing symbol might be
+ // linked with the corresponding one from a native library, which is
+ // loaded later after LTO. Although this could potentially result in
+ // an ODR violation, we choose to permit this scenario as a warning.
+ std::string message = "The pending prevailing symbol(" + name.str() +
+ ") in the bitcode file(" +
+ toString(undef->getFile()) +
+ ") is overridden by a post-processed native "
+ "object (from native archive): " +
+ toString(file);
+ warn(message);
+ } else {
+ // Preserve the original bitcode file name (instead of using the
+ // object file name).
+ file = undef->getFile();
+ }
+ }
}
// Defined symbols take priority over other types of symbols, so in case
// of a name conflict, we fall through to the replaceSymbol() call below.
diff --git a/lld/test/MachO/symbol-resolution-error.ll b/lld/test/MachO/symbol-resolution-error.ll
new file mode 100644
index 00000000000000..12a6f5cbc97e47
--- /dev/null
+++ b/lld/test/MachO/symbol-resolution-error.ll
@@ -0,0 +1,63 @@
+; REQUIRES: x86
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt --thinlto-bc %t/f.ll -o %t/f.o
+; RUN: llvm-ar rcs %t/libf.a %t/f.o
+; RUN: opt --thinlto-bc %t/q.ll -o %t/q.o
+; RUN: llvm-ar rcs %t/libq.a %t/q.o
+; RUN: opt --thinlto-bc %t/b.ll -o %t/b.o
+; RUN: llvm-ar rcs %t/libb.a %t/b.o
+; RUN: opt --thinlto-bc %t/m.ll -o %t/m.o
+
+; RUN: not %no-fatal-warnings-lld -lSystem %t/libf.a %t/libq.a %t/libb.a %t/m.o -o %t/test.out 2>&1 | FileCheck %s
+
+; We can't read symbols that are set by `module asm` in the bitcode's symbol table.
+; LTO compiles only f.ll, b.ll, and m.ll without q.ll due to missing reference to `q_asm` in f.ll.
+; However, later `q_asm` appears in the resulting object for `f.ll`, which triggers resolving
+; a ODR symbol `q_odr` from `q.ll,` as opposed to one expected from `b.ll`.
+; As a result, we have invalid relocation for `q_odr` as `q.ll` is neither compiled, nor linked.
+
+; CHECK: error: The pending prevailing symbol(_q_odr) in the bitcode file({{.*}}libb.a(b.o)) is overridden by a non-native object (from bitcode): {{.*}}libq.a(q.o)
+
+;--- f.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+module asm ".no_dead_strip _q_asm"
+
+define i64 @f() {
+ ret i64 1
+}
+
+;--- q.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+ at q_asm = global i64 1
+
+ at q_odr = linkonce_odr hidden unnamed_addr constant i64 0
+
+
+;--- b.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+ at q_odr = linkonce_odr hidden unnamed_addr constant i64 1
+
+define i64 @b() {
+ ret i64 ptrtoint (i64* @q_odr to i64)
+}
+
+;--- m.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+declare i64 @f()
+declare i64 @b()
+
+define i64 @main() {
+ %call1 = call i64 @f()
+ %call2 = call i64 @b()
+ %add = add nsw i64 %call1, %call2
+ ret i64 %add
+}
diff --git a/lld/test/MachO/symbol-resolution-warning.ll b/lld/test/MachO/symbol-resolution-warning.ll
new file mode 100644
index 00000000000000..cf8992dc289c07
--- /dev/null
+++ b/lld/test/MachO/symbol-resolution-warning.ll
@@ -0,0 +1,61 @@
+; REQUIRES: x86
+; RUN: rm -rf %t; split-file %s %t
+
+; RUN: opt %t/f.ll -o %t/f.o
+; RUN: llvm-ar rcs %t/libf.a %t/f.o
+; RUN: llc -filetype=obj %t/q.ll -o %t/q.o
+; RUN: llvm-ar rcs %t/libq.a %t/q.o
+; RUN: llc -filetype=obj %t/m.ll -o %t/m.o
+
+; RUN: %no-fatal-warnings-lld -dylib -lSystem -L%t %t/libf.a %t/m.o -o %t/test.out 2>&1 | FileCheck %s
+
+; We can't read symbols that are set by module asm in the bitcode's symbol table.
+; LTO internalizes `odr` in `f.ll`, and thus the prevailing `odr` remains unresolved.
+; `q.ll` is loaded after LTO is finished during post-processing LC linker options.
+; The unresolved prevailing `odr` is now resolved with `q.ll`.
+
+; CHECK: warning: The pending prevailing symbol(_odr) in the bitcode file({{.*}}libf.a(f.o)) is overridden by a post-processed native object (from native archive): {{.*}}libq.a(q.o)
+
+;--- f.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+!0 = !{!"-lq"}
+!llvm.linker.options = !{!0}
+
+define i64 @f() {
+ %1 = call i64 @odr()
+ %2 = call i64 @q()
+ %3 = add i64 %1, %2
+ ret i64 %3
+}
+
+define weak hidden i64 @odr() noinline {
+ ret i64 1
+}
+
+declare i64 @q()
+
+;--- q.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+define i64 @q() {
+ %1 = call i64 @odr()
+ ret i64 %1
+}
+
+define linkonce_odr hidden i64 @odr() noinline {
+ ret i64 2
+}
+
+;--- m.ll
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+declare i64 @f()
+
+define i64 @m() {
+ %1 = call i64 @f()
+ ret i64 %1
+}
More information about the llvm-commits
mailing list