[lld] [llvm] [LLD][COFF] Make unresolved symbol search behavior compliant with MSVC link.exe (PR #85290)
Alexandre Ganea via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 14 11:30:17 PDT 2024
https://github.com/aganea updated https://github.com/llvm/llvm-project/pull/85290
>From d5c4adfb5e85e80758cb51a91a246630924c7472 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <aganea at havenstudios.com>
Date: Thu, 14 Mar 2024 13:31:51 -0400
Subject: [PATCH 1/3] [LLD][COFF] Align unresolved symbols search behavior with
MSVC link.exe
---
lld/COFF/Driver.cpp | 53 +++++++++---
lld/COFF/Driver.h | 21 +++--
lld/COFF/InputFiles.cpp | 13 +--
lld/COFF/InputFiles.h | 26 ++++--
lld/COFF/SymbolTable.cpp | 110 +++++++++++++++++++++++--
lld/COFF/Symbols.h | 11 +++
lld/test/COFF/duplicate-imp-func.s | 6 +-
lld/test/COFF/lib-searching-behavior.s | 67 +++++++++++++++
llvm/include/llvm/Support/Allocator.h | 28 ++++++-
9 files changed, 292 insertions(+), 43 deletions(-)
create mode 100644 lld/test/COFF/lib-searching-behavior.s
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 22ee2f133be98a..38e0392a876307 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -187,7 +187,8 @@ MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr<MemoryBuffer> mb) {
}
void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
- bool wholeArchive, bool lazy) {
+ bool wholeArchive, bool lazy,
+ ArchiveFile *parent) {
StringRef filename = mb->getBufferIdentifier();
MemoryBufferRef mbref = takeBuffer(std::move(mb));
@@ -213,11 +214,11 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
ctx.symtab.addFile(make<ArchiveFile>(ctx, mbref));
break;
case file_magic::bitcode:
- ctx.symtab.addFile(make<BitcodeFile>(ctx, mbref, "", 0, lazy));
+ ctx.symtab.addFile(make<BitcodeFile>(ctx, mbref, "", 0, lazy, parent));
break;
case file_magic::coff_object:
case file_magic::coff_import_library:
- ctx.symtab.addFile(make<ObjFile>(ctx, mbref, lazy));
+ ctx.symtab.addFile(make<ObjFile>(ctx, mbref, lazy, parent));
break;
case file_magic::pdb:
ctx.symtab.addFile(make<PDBInputFile>(ctx, mbref));
@@ -242,7 +243,9 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
}
}
-void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive, bool lazy) {
+void LinkerDriver::enqueuePath(
+ StringRef path, bool wholeArchive, bool lazy,
+ std::optional<std::shared_future<ArchiveFile *>> parent) {
auto future = std::make_shared<std::future<MBErrPair>>(
createFutureForFile(std::string(path)));
std::string pathStr = std::string(path);
@@ -281,13 +284,15 @@ void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive, bool lazy) {
else
error(msg + "; did you mean '" + nearest + "'");
} else
- ctx.driver.addBuffer(std::move(mb), wholeArchive, lazy);
+ ctx.driver.addBuffer(std::move(mb), wholeArchive, lazy,
+ parent ? parent->get() : nullptr);
});
}
void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName,
StringRef parentName,
- uint64_t offsetInArchive) {
+ uint64_t offsetInArchive,
+ ArchiveFile *parent) {
file_magic magic = identify_magic(mb.getBuffer());
if (magic == file_magic::coff_import_library) {
InputFile *imp = make<ImportFile>(ctx, mb);
@@ -298,10 +303,10 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName,
InputFile *obj;
if (magic == file_magic::coff_object) {
- obj = make<ObjFile>(ctx, mb);
+ obj = make<ObjFile>(ctx, mb, /*lazy=*/false, parent);
} else if (magic == file_magic::bitcode) {
- obj =
- make<BitcodeFile>(ctx, mb, parentName, offsetInArchive, /*lazy=*/false);
+ obj = make<BitcodeFile>(ctx, mb, parentName, offsetInArchive,
+ /*lazy=*/false, parent);
} else if (magic == file_magic::coff_cl_gl_object) {
error(mb.getBufferIdentifier() +
": is not a native COFF file. Recompile without /GL?");
@@ -318,7 +323,8 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName,
void LinkerDriver::enqueueArchiveMember(const Archive::Child &c,
const Archive::Symbol &sym,
- StringRef parentName) {
+ StringRef parentName,
+ ArchiveFile *parent) {
auto reportBufferError = [=](Error &&e, StringRef childName) {
fatal("could not get the buffer for the member defining symbol " +
@@ -335,7 +341,7 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &c,
enqueueTask([=]() {
llvm::TimeTraceScope timeScope("Archive: ", mb.getBufferIdentifier());
ctx.driver.addArchiveBuffer(mb, toCOFFString(ctx, sym), parentName,
- offsetInArchive);
+ offsetInArchive, parent);
});
return;
}
@@ -356,7 +362,15 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &c,
// used as the buffer identifier.
ctx.driver.addArchiveBuffer(takeBuffer(std::move(mbOrErr.first)),
toCOFFString(ctx, sym), "",
- /*OffsetInArchive=*/0);
+ /*OffsetInArchive=*/0, parent);
+ });
+}
+
+void LinkerDriver::enqueueLazyFile(InputFile *file) {
+ enqueueTask([=]() {
+ // Once it has been enqued, it cannot be lazy anymore.
+ file->lazy = false;
+ ctx.symtab.addFile(file);
});
}
@@ -2111,17 +2125,30 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
{
llvm::TimeTraceScope timeScope2("Parse & queue inputs");
bool inLib = false;
+ std::optional<std::shared_future<ArchiveFile *>> inLibArchive;
for (auto *arg : args) {
switch (arg->getOption().getID()) {
case OPT_end_lib:
if (!inLib)
error("stray " + arg->getSpelling());
inLib = false;
+ inLibArchive = std::nullopt;
break;
case OPT_start_lib:
if (inLib)
error("nested " + arg->getSpelling());
inLib = true;
+ // In is important to create a fake archive here so that we remember its
+ // placement on the command-line. This will be later needed to resolve
+ // symbols in the archive order required by the MSVC specification.
+ {
+ auto a = std::make_shared<std::promise<ArchiveFile *>>();
+ inLibArchive = a->get_future().share();
+ enqueueTask([=] {
+ a->set_value(
+ make<ArchiveFile>(ctx, MemoryBufferRef({}, "<cmdline-lib>")));
+ });
+ }
break;
case OPT_wholearchive_file:
if (std::optional<StringRef> path = findFileIfNew(arg->getValue()))
@@ -2129,7 +2156,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
break;
case OPT_INPUT:
if (std::optional<StringRef> path = findFileIfNew(arg->getValue()))
- enqueuePath(*path, isWholeArchive(*path), inLib);
+ enqueuePath(*path, isWholeArchive(*path), inLib, inLibArchive);
break;
default:
// Ignore other options.
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index fa54de05befb58..da3c41e1bca734 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -22,6 +22,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/TarWriter.h"
#include "llvm/WindowsDriver/MSVCPaths.h"
+#include <future>
#include <memory>
#include <optional>
#include <set>
@@ -91,13 +92,20 @@ class LinkerDriver {
// Used by ArchiveFile to enqueue members.
void enqueueArchiveMember(const Archive::Child &c, const Archive::Symbol &sym,
- StringRef parentName);
+ StringRef parentName,
+ ArchiveFile *parent = nullptr);
- void enqueuePDB(StringRef Path) { enqueuePath(Path, false, false); }
+ void enqueuePDB(StringRef Path) {
+ enqueuePath(Path, false, false, /*parent=*/std::nullopt);
+ }
MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> mb);
- void enqueuePath(StringRef path, bool wholeArchive, bool lazy);
+ void enqueuePath(
+ StringRef path, bool wholeArchive, bool lazy,
+ std::optional<std::shared_future<ArchiveFile *>> parent = std::nullopt);
+
+ void enqueueLazyFile(InputFile *file);
std::unique_ptr<llvm::TarWriter> tar; // for /linkrepro
@@ -182,10 +190,11 @@ class LinkerDriver {
StringRef findDefaultEntry();
WindowsSubsystem inferSubsystem();
- void addBuffer(std::unique_ptr<MemoryBuffer> mb, bool wholeArchive,
- bool lazy);
+ void addBuffer(std::unique_ptr<MemoryBuffer> mb, bool wholeArchive, bool lazy,
+ ArchiveFile *parent = nullptr);
void addArchiveBuffer(MemoryBufferRef mbref, StringRef symName,
- StringRef parentName, uint64_t offsetInArchive);
+ StringRef parentName, uint64_t offsetInArchive,
+ ArchiveFile *parent = nullptr);
void enqueueTask(std::function<void()> task);
bool run();
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 037fae45242c6f..42cdd1cf3b6c2c 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -94,9 +94,12 @@ static bool ignoredSymbolName(StringRef name) {
}
ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
- : InputFile(ctx, ArchiveKind, m) {}
+ : InputFile(ctx, ArchiveKind, m, /*lazy=*/true) {
+ static unsigned Order = 0;
+ CmdLineIndex = Order++;
+}
-void ArchiveFile::parse() {
+void ArchiveFile::parseLazy() {
// Parse a MemoryBufferRef as an archive file.
file = CHECK(Archive::create(mb), this);
@@ -115,7 +118,7 @@ void ArchiveFile::addMember(const Archive::Symbol &sym) {
if (!seen.insert(c.getChildOffset()).second)
return;
- ctx.driver.enqueueArchiveMember(c, sym, getName());
+ ctx.driver.enqueueArchiveMember(c, sym, getName(), this);
}
std::vector<MemoryBufferRef> lld::coff::getArchiveMembers(Archive *file) {
@@ -1000,8 +1003,8 @@ void ImportFile::parse() {
BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
StringRef archiveName, uint64_t offsetInArchive,
- bool lazy)
- : InputFile(ctx, BitcodeKind, mb, lazy) {
+ bool lazy, ArchiveFile *parent)
+ : InputFile(ctx, BitcodeKind, mb, lazy), parent(parent) {
std::string path = mb.getBufferIdentifier().str();
if (ctx.config.thinLTOIndexOnly)
path = replaceThinLTOSuffix(mb.getBufferIdentifier(),
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 3b55cd791bfda2..7070f51fdf78ac 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -66,7 +66,6 @@ class InputFile {
enum Kind {
ArchiveKind,
ObjectKind,
- LazyObjectKind,
PDBKind,
ImportKind,
BitcodeKind,
@@ -105,7 +104,7 @@ class InputFile {
public:
// True if this is a lazy ObjFile or BitcodeFile.
- bool lazy = false;
+ bool lazy;
};
// .lib or .a file.
@@ -113,23 +112,30 @@ class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m);
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
- void parse() override;
+ void parse() override{};
+ void parseLazy();
// Enqueues an archive member load for the given symbol. If we've already
// enqueued a load for the same archive member, this function does nothing,
// which ensures that we don't load the same member more than once.
void addMember(const Archive::Symbol &sym);
-private:
std::unique_ptr<Archive> file;
+
+ // The order this archive was seen on the cmd-line. This is later needed for
+ // resolving undefined symbols in archive OBJs.
+ uint32_t CmdLineIndex;
+
+private:
llvm::DenseSet<uint64_t> seen;
};
// .obj or .o file. This may be a member of an archive file.
class ObjFile : public InputFile {
public:
- explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false)
- : InputFile(ctx, ObjectKind, m, lazy) {}
+ explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false,
+ ArchiveFile *parent = nullptr)
+ : InputFile(ctx, ObjectKind, m, lazy), parent(parent) {}
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
void parse() override;
void parseLazy();
@@ -182,6 +188,9 @@ class ObjFile : public InputFile {
// True if this file was compiled with /guard:ehcont.
bool hasGuardEHCont() { return feat00Flags & 0x4000; }
+ // Whether this Obj buffer is part of an archive.
+ ArchiveFile *parent;
+
// Pointer to the PDB module descriptor builder. Various debug info records
// will reference object files by "module index", which is here. Things like
// source files and section contributions are also recorded here. Will be null
@@ -369,7 +378,7 @@ class BitcodeFile : public InputFile {
public:
explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
StringRef archiveName, uint64_t offsetInArchive,
- bool lazy);
+ bool lazy = false, ArchiveFile *parent = nullptr);
~BitcodeFile();
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
ArrayRef<Symbol *> getSymbols() { return symbols; }
@@ -377,6 +386,9 @@ class BitcodeFile : public InputFile {
void parseLazy();
std::unique_ptr<llvm::lto::InputFile> obj;
+ // Whether this bitcode buffer is part of an archive.
+ ArchiveFile *parent;
+
private:
void parse() override;
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 44aa506d2c35da..f570e8c211f43d 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -54,8 +54,10 @@ void SymbolTable::addFile(InputFile *file) {
if (file->lazy) {
if (auto *f = dyn_cast<BitcodeFile>(file))
f->parseLazy();
- else
- cast<ObjFile>(file)->parseLazy();
+ else if (auto *o = dyn_cast<ObjFile>(file))
+ o->parseLazy();
+ else if (auto *a = dyn_cast<ArchiveFile>(file))
+ a->parseLazy();
} else {
file->parse();
if (auto *f = dyn_cast<ObjFile>(file)) {
@@ -102,7 +104,7 @@ static void forceLazy(Symbol *s) {
}
case Symbol::Kind::LazyObjectKind: {
InputFile *file = cast<LazyObject>(s)->file;
- file->ctx.symtab.addFile(file);
+ file->ctx.driver.enqueueLazyFile(file);
break;
}
case Symbol::Kind::LazyDLLSymbolKind: {
@@ -562,6 +564,57 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
return result;
}
+static LazyIntrusiveNode *lazyNode(Symbol *s) {
+ if (auto *sym = dyn_cast<LazyArchive>(s))
+ return &sym->node;
+ if (auto *sym = dyn_cast<LazyObject>(s))
+ return &sym->node;
+ return nullptr;
+}
+
+static ArchiveFile *lazyParent(InputFile *f) {
+ if (!f)
+ return nullptr;
+ if (auto *obj = dyn_cast<ObjFile>(f))
+ return obj->parent;
+ if (auto *obj = dyn_cast<BitcodeFile>(f))
+ return obj->parent;
+ return nullptr;
+}
+
+static ArchiveFile *lazyArchive(Symbol *s) {
+ if (auto *sym = dyn_cast<LazyArchive>(s))
+ return sym->file;
+ if (auto *sym = dyn_cast<LazyObject>(s))
+ return lazyParent(sym->file);
+ return nullptr;
+}
+
+// The search behavior for undefined symbols is different when the OBJ
+// was pulled from an archive (LIB). This is documented here:
+// https://learn.microsoft.com/en-us/cpp/build/reference/link-input-files?view=msvc-170
+// "Object files on the command line are processed in the order they
+// appear on the command line. Libraries are searched in command line
+// order as well, with the following caveat: Symbols that are unresolved
+// when bringing in an object file from a library are searched for in
+// that library first, and then the following libraries from the command
+// line and /DEFAULTLIB (Specify default library) directives, and then
+// to any libraries at the beginning of the command line."
+static Symbol *searchArchiveSymbol(Symbol *s, ArchiveFile *pivot) {
+ auto &Alloc = getSpecificAllocSingleton<SymbolUnion>().Allocator;
+ Symbol *curr = s;
+ for (;;) {
+ if (lazyArchive(curr)->CmdLineIndex >= pivot->CmdLineIndex)
+ return curr;
+ uint32_t next = lazyNode(curr)->next;
+ if (!next)
+ break;
+ curr = reinterpret_cast<LazyArchive *>(
+ Alloc.fromAlignedIndex<SymbolUnion>(next));
+ }
+ return s;
+}
+
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
bool isWeakAlias) {
auto [s, wasInserted] = insert(name, f);
@@ -569,11 +622,43 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
replaceSymbol<Undefined>(s, name);
return s;
}
- if (s->isLazy())
+ if (s->isLazy()) {
+ if (ArchiveFile *parent = lazyParent(f)) {
+ Symbol *selected = searchArchiveSymbol(s, parent);
+ forceLazy(selected);
+ // Now that we have selected a symbol, we don't need the linked list of
+ // `LazyArchive`s anymore. Collapse to the selected symbol.
+ memcpy(s, selected, sizeof(SymbolUnion));
+ return s;
+ }
forceLazy(s);
+ }
return s;
}
+// This creates a linked list of archives where a specific symbol was seen.
+// We later walk that list if a undefined symbol needs to be resolved from an
+// archive OBJ.
+template <typename T, typename... ArgT>
+static void chainLazy(LazyIntrusiveNode *front, ArgT &&...arg) {
+ // Chain with symbols defined in other archives
+ Symbol *newSym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
+ newSym->canInline = true;
+ replaceSymbol<T>(newSym, std::forward<ArgT>(arg)...);
+
+ auto &Alloc = getSpecificAllocSingleton<SymbolUnion>().Allocator;
+ uint32_t index = Alloc.identifyKnownAlignedObject<SymbolUnion>(newSym);
+
+ if (!front->next)
+ front->next = index;
+ if (front->last) {
+ Symbol *last = reinterpret_cast<Symbol *>(
+ Alloc.fromAlignedIndex<SymbolUnion>(front->last));
+ lazyNode(last)->next = index;
+ }
+ front->last = index;
+}
+
void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
StringRef name = sym.getName();
auto [s, wasInserted] = insert(name);
@@ -581,6 +666,10 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
replaceSymbol<LazyArchive>(s, f, sym);
return;
}
+ if (auto *n = lazyNode(s)) {
+ chainLazy<LazyArchive>(n, f, sym);
+ return;
+ }
auto *u = dyn_cast<Undefined>(s);
if (!u || u->weakAlias || s->pendingArchiveLoad)
return;
@@ -588,19 +677,22 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
f->addMember(sym);
}
-void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
+void SymbolTable::addLazyObject(InputFile *f, StringRef name) {
assert(f->lazy);
- auto [s, wasInserted] = insert(n, f);
+ auto [s, wasInserted] = insert(name, f);
if (wasInserted) {
- replaceSymbol<LazyObject>(s, f, n);
+ replaceSymbol<LazyObject>(s, f, name);
+ return;
+ }
+ if (auto *n = lazyNode(s)) {
+ chainLazy<LazyObject>(n, f, name);
return;
}
auto *u = dyn_cast<Undefined>(s);
if (!u || u->weakAlias || s->pendingArchiveLoad)
return;
s->pendingArchiveLoad = true;
- f->lazy = false;
- addFile(f);
+ f->ctx.driver.enqueueLazyFile(f);
}
void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index ca69fb2d052706..1577406c8626cf 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -286,6 +286,15 @@ class DefinedSynthetic : public Defined {
uint32_t offset;
};
+// Keep track of symbols with the same name exposed by archives. This is
+// required to later resolve unresolved symbols in the same order as required
+// by the MSVC spec. These are indexes in the specific bump allocator for
+// SymbolUnion.
+struct LazyIntrusiveNode {
+ uint32_t next = 0;
+ uint32_t last = 0;
+};
+
// This class represents a symbol defined in an archive file. It is
// created from an archive file header, and it knows how to load an
// object file from an archive to replace itself with a defined
@@ -302,6 +311,7 @@ class LazyArchive : public Symbol {
ArchiveFile *file;
const Archive::Symbol sym;
+ LazyIntrusiveNode node;
};
class LazyObject : public Symbol {
@@ -309,6 +319,7 @@ class LazyObject : public Symbol {
LazyObject(InputFile *f, StringRef n) : Symbol(LazyObjectKind, n), file(f) {}
static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
InputFile *file;
+ LazyIntrusiveNode node;
};
// MinGW only.
diff --git a/lld/test/COFF/duplicate-imp-func.s b/lld/test/COFF/duplicate-imp-func.s
index fc0cf1ef6ae051..631c714c951f77 100644
--- a/lld/test/COFF/duplicate-imp-func.s
+++ b/lld/test/COFF/duplicate-imp-func.s
@@ -28,8 +28,10 @@
# Once the import library member from %t.lib.dll.a gets loaded, libfunc
# and __imp_libfunc already are defined.
-# Just check that this fails cleanly (doesn't crash).
-# RUN: not lld-link -lldmingw -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper.a
+# This test should now succeed since we're following the MSVC symbol searching behvior described in:
+# https://learn.microsoft.com/en-us/cpp/build/reference/link-input-files?view=msvc-170
+# In this case, the linker will select the libfunc symbol in %t.helper.a
+# RUN: lld-link -lldmingw -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper.a
# Test with %t.helper.a on the command line; in this case we won't try to
# include libfunc from %t.lib.dll.a and everything works fine.
diff --git a/lld/test/COFF/lib-searching-behavior.s b/lld/test/COFF/lib-searching-behavior.s
new file mode 100644
index 00000000000000..eb4ba55c397534
--- /dev/null
+++ b/lld/test/COFF/lib-searching-behavior.s
@@ -0,0 +1,67 @@
+# REQUIRES: x86
+
+# This test ensures that we're following the MSVC symbol searching behvior described in:
+# https://learn.microsoft.com/en-us/cpp/build/reference/link-input-files?view=msvc-170
+# "Object files on the command line are processed in the order they appear on the command line.
+# Libraries are searched in command line order as well, with the following caveat: Symbols that
+# are unresolved when bringing in an object file from a library are searched for in that library
+# first, and then the following libraries from the command line and /DEFAULTLIB (Specify default
+# library) directives, and then to any libraries at the beginning of the command line."
+
+# RUN: echo -e ".intel_syntax noprefix\n.globl libfunc\n.text\nlibfunc:\nmov eax, 1\nret\n.section .drectve\n.ascii \"/EXPORT:libfunc\"" > %t.lib.s
+# RUN: llvm-mc -triple=x86_64-pc-windows-msvc %t.lib.s -filetype=obj -o %t.lib.o
+# RUN: lld-link -dll -out:%t.lib.dll -entry:libfunc %t.lib.o -implib:%t.lib.dll.a
+
+# RUN: echo -e ".globl helper\n.text\nhelper:\ncall libfunc\nret" > %t.helper1.s
+# RUN: echo -e ".intel_syntax noprefix\n.globl libfunc\n.text\nlibfunc:\nxor eax, eax\nret" > %t.helper2.s
+# RUN: llvm-mc -triple=x86_64-pc-windows-msvc %t.helper1.s -filetype=obj -o %t.helper1.o
+# RUN: llvm-mc -triple=x86_64-pc-windows-msvc %t.helper2.s -filetype=obj -o %t.helper2.o
+
+# RUN: llvm-ar rcs %t.helper.a %t.helper1.o %t.helper2.o
+
+# RUN: llvm-mc -triple=x86_64-pc-windows-msvc %s -filetype=obj -o %t.main.o
+
+# Simulate a setup, where two libraries provide the same function;
+# %t.lib.dll.a is a pure import library which provides a import symbol "libfunc".
+# %t.helper.a is a static library which contains "helper1" and "helper2".
+#
+# helper1 contains an undefined reference to libfunc. helper2 contains an
+# implementation of libfunc.
+#
+# First %t.main.o is processed and pushes a undefined symbol 'helper'.
+# Then %t.lib.dll.a is processed a pushes the lazy archive symbol 'libfunc' in the symbol table.
+# Then comes %t.helper.a and it pushes 'helper' and 'libfunc' as lazy symbols. Then 'helper' is
+# resolved and that pushes 'libfunc' as a undefined symbol. That pulls on %t.helper.a(%t.helper2.o)
+# which contains the 'libfunc' symbol, resolving it. This is illustrative of the MSVC library searching
+# behavior which starts with the current library object which requested the unresolved symbol.
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper.a
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB %s
+
+# In this case, the symbol in %t.helper.a(%t.helper2.o) is still considered first.
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.helper.a %t.lib.dll.a
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB %s
+
+# In this test we're defining libfunc in a third library that comes after all the others. The symbol should be pulled
+# now from that third library.
+# RUN: llvm-ar rcs %t.helper1.a %t.helper1.o
+# RUN: llvm-ar rcs %t.helper2.a %t.helper2.o
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper1.a %t.helper2.a
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB %s
+
+# LIB: 140001000 <.text>:
+# LIB: 140001000: e8 03 00 00 00 callq 0x140001008 <.text+0x8>
+# LIB: 140001008: e8 03 00 00 00 callq 0x140001010 <.text+0x10>
+# LIB: 140001010: 31 c0 xorl %eax, %eax
+
+# In this last test, we should pick up the import symbol from %t.lib.dll.a since it isn't defined anywhere else.
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper1.a
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB-IMP %s
+
+# LIB-IMP: 140001000 <.text>:
+# LIB-IMP: 140001010: ff 25 22 10 00 00 jmpq *4130(%rip)
+
+ .globl main
+ .text
+main:
+ call helper
+ ret
diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index c1e5c6d2853bd5..8c061b72d65f72 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -278,6 +278,32 @@ class BumpPtrAllocatorImpl
return Out / alignof(T);
}
+ /// Gets an already allocated object from an index that was previously
+ /// retrieved with `identifyKnownAlignedObject`.
+ template <typename T> T *fromAlignedIndex(int64_t Index) {
+ Index *= alignof(T);
+
+ int64_t InSlabIdx = 0;
+ for (size_t Idx = 0, E = Slabs.size(); Idx < E; Idx++) {
+ char *S = static_cast<char *>(Slabs[Idx]);
+ if (Index >= InSlabIdx &&
+ Index < InSlabIdx + static_cast<int64_t>(computeSlabSize(Idx)))
+ return reinterpret_cast<T *>(S + (Index - InSlabIdx));
+ InSlabIdx += static_cast<int64_t>(computeSlabSize(Idx));
+ }
+
+ // Use negative index to denote custom sized slabs.
+ int64_t InCustomSizedSlabIdx = -1;
+ for (size_t Idx = 0, E = CustomSizedSlabs.size(); Idx < E; Idx++) {
+ char *S = static_cast<char *>(CustomSizedSlabs[Idx].first);
+ int64_t Size = static_cast<int64_t>(CustomSizedSlabs[Idx].second);
+ if (Index <= InCustomSizedSlabIdx && Index > InCustomSizedSlabIdx - Size)
+ return reinterpret_cast<T *>(S - (Index - InCustomSizedSlabIdx));
+ InCustomSizedSlabIdx -= static_cast<int64_t>(Size);
+ }
+ return nullptr;
+ }
+
size_t getTotalMemory() const {
size_t TotalMemory = 0;
for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I)
@@ -380,9 +406,9 @@ typedef BumpPtrAllocatorImpl<> BumpPtrAllocator;
/// This allows calling the destructor in DestroyAll() and when the allocator is
/// destroyed.
template <typename T> class SpecificBumpPtrAllocator {
+public:
BumpPtrAllocator Allocator;
-public:
SpecificBumpPtrAllocator() {
// Because SpecificBumpPtrAllocator walks the memory to call destructors,
// it can't have red zones between allocations.
>From b1149e14bed66742286093dcb68399029980cdaf Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <aganea at havenstudios.com>
Date: Thu, 14 Mar 2024 14:29:25 -0400
Subject: [PATCH 2/3] Revert unneeded changes
---
lld/COFF/SymbolTable.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index f570e8c211f43d..1b0e073ac00526 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -677,15 +677,15 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
f->addMember(sym);
}
-void SymbolTable::addLazyObject(InputFile *f, StringRef name) {
+void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
assert(f->lazy);
- auto [s, wasInserted] = insert(name, f);
+ auto [s, wasInserted] = insert(n, f);
if (wasInserted) {
- replaceSymbol<LazyObject>(s, f, name);
+ replaceSymbol<LazyObject>(s, f, n);
return;
}
- if (auto *n = lazyNode(s)) {
- chainLazy<LazyObject>(n, f, name);
+ if (auto *node = lazyNode(s)) {
+ chainLazy<LazyObject>(node, f, n);
return;
}
auto *u = dyn_cast<Undefined>(s);
>From 6c2a7770dccde79e0b0b61b004cec4447d24848f Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <aganea at havenstudios.com>
Date: Thu, 14 Mar 2024 14:29:48 -0400
Subject: [PATCH 3/3] Test cmd-line libraries
---
lld/test/COFF/lib-searching-behavior.s | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/lld/test/COFF/lib-searching-behavior.s b/lld/test/COFF/lib-searching-behavior.s
index eb4ba55c397534..4ba786f015f449 100644
--- a/lld/test/COFF/lib-searching-behavior.s
+++ b/lld/test/COFF/lib-searching-behavior.s
@@ -53,13 +53,17 @@
# LIB: 140001008: e8 03 00 00 00 callq 0x140001010 <.text+0x10>
# LIB: 140001010: 31 c0 xorl %eax, %eax
-# In this last test, we should pick up the import symbol from %t.lib.dll.a since it isn't defined anywhere else.
+# Here, we should pick up the import symbol from %t.lib.dll.a since it isn't defined anywhere else.
# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper1.a
# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB-IMP %s
# LIB-IMP: 140001000 <.text>:
# LIB-IMP: 140001010: ff 25 22 10 00 00 jmpq *4130(%rip)
+# Test cmd-line archives
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a -start-lib %t.helper1.o %t.helper2.o -end-lib
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB %s
+
.globl main
.text
main:
More information about the llvm-commits
mailing list