[lld] [llvm] [LLD][COFF] Make unresolved symbol search behavior compliant with MSVC link.exe (PR #85290)

Alexandre Ganea via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 14 11:30:17 PDT 2024


https://github.com/aganea updated https://github.com/llvm/llvm-project/pull/85290

>From d5c4adfb5e85e80758cb51a91a246630924c7472 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <aganea at havenstudios.com>
Date: Thu, 14 Mar 2024 13:31:51 -0400
Subject: [PATCH 1/3] [LLD][COFF] Align unresolved symbols search behavior with
 MSVC link.exe

---
 lld/COFF/Driver.cpp                    |  53 +++++++++---
 lld/COFF/Driver.h                      |  21 +++--
 lld/COFF/InputFiles.cpp                |  13 +--
 lld/COFF/InputFiles.h                  |  26 ++++--
 lld/COFF/SymbolTable.cpp               | 110 +++++++++++++++++++++++--
 lld/COFF/Symbols.h                     |  11 +++
 lld/test/COFF/duplicate-imp-func.s     |   6 +-
 lld/test/COFF/lib-searching-behavior.s |  67 +++++++++++++++
 llvm/include/llvm/Support/Allocator.h  |  28 ++++++-
 9 files changed, 292 insertions(+), 43 deletions(-)
 create mode 100644 lld/test/COFF/lib-searching-behavior.s

diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 22ee2f133be98a..38e0392a876307 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -187,7 +187,8 @@ MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr<MemoryBuffer> mb) {
 }
 
 void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
-                             bool wholeArchive, bool lazy) {
+                             bool wholeArchive, bool lazy,
+                             ArchiveFile *parent) {
   StringRef filename = mb->getBufferIdentifier();
 
   MemoryBufferRef mbref = takeBuffer(std::move(mb));
@@ -213,11 +214,11 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
     ctx.symtab.addFile(make<ArchiveFile>(ctx, mbref));
     break;
   case file_magic::bitcode:
-    ctx.symtab.addFile(make<BitcodeFile>(ctx, mbref, "", 0, lazy));
+    ctx.symtab.addFile(make<BitcodeFile>(ctx, mbref, "", 0, lazy, parent));
     break;
   case file_magic::coff_object:
   case file_magic::coff_import_library:
-    ctx.symtab.addFile(make<ObjFile>(ctx, mbref, lazy));
+    ctx.symtab.addFile(make<ObjFile>(ctx, mbref, lazy, parent));
     break;
   case file_magic::pdb:
     ctx.symtab.addFile(make<PDBInputFile>(ctx, mbref));
@@ -242,7 +243,9 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
   }
 }
 
-void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive, bool lazy) {
+void LinkerDriver::enqueuePath(
+    StringRef path, bool wholeArchive, bool lazy,
+    std::optional<std::shared_future<ArchiveFile *>> parent) {
   auto future = std::make_shared<std::future<MBErrPair>>(
       createFutureForFile(std::string(path)));
   std::string pathStr = std::string(path);
@@ -281,13 +284,15 @@ void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive, bool lazy) {
       else
         error(msg + "; did you mean '" + nearest + "'");
     } else
-      ctx.driver.addBuffer(std::move(mb), wholeArchive, lazy);
+      ctx.driver.addBuffer(std::move(mb), wholeArchive, lazy,
+                           parent ? parent->get() : nullptr);
   });
 }
 
 void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName,
                                     StringRef parentName,
-                                    uint64_t offsetInArchive) {
+                                    uint64_t offsetInArchive,
+                                    ArchiveFile *parent) {
   file_magic magic = identify_magic(mb.getBuffer());
   if (magic == file_magic::coff_import_library) {
     InputFile *imp = make<ImportFile>(ctx, mb);
@@ -298,10 +303,10 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName,
 
   InputFile *obj;
   if (magic == file_magic::coff_object) {
-    obj = make<ObjFile>(ctx, mb);
+    obj = make<ObjFile>(ctx, mb, /*lazy=*/false, parent);
   } else if (magic == file_magic::bitcode) {
-    obj =
-        make<BitcodeFile>(ctx, mb, parentName, offsetInArchive, /*lazy=*/false);
+    obj = make<BitcodeFile>(ctx, mb, parentName, offsetInArchive,
+                            /*lazy=*/false, parent);
   } else if (magic == file_magic::coff_cl_gl_object) {
     error(mb.getBufferIdentifier() +
           ": is not a native COFF file. Recompile without /GL?");
@@ -318,7 +323,8 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName,
 
 void LinkerDriver::enqueueArchiveMember(const Archive::Child &c,
                                         const Archive::Symbol &sym,
-                                        StringRef parentName) {
+                                        StringRef parentName,
+                                        ArchiveFile *parent) {
 
   auto reportBufferError = [=](Error &&e, StringRef childName) {
     fatal("could not get the buffer for the member defining symbol " +
@@ -335,7 +341,7 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &c,
     enqueueTask([=]() {
       llvm::TimeTraceScope timeScope("Archive: ", mb.getBufferIdentifier());
       ctx.driver.addArchiveBuffer(mb, toCOFFString(ctx, sym), parentName,
-                                  offsetInArchive);
+                                  offsetInArchive, parent);
     });
     return;
   }
@@ -356,7 +362,15 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &c,
     // used as the buffer identifier.
     ctx.driver.addArchiveBuffer(takeBuffer(std::move(mbOrErr.first)),
                                 toCOFFString(ctx, sym), "",
-                                /*OffsetInArchive=*/0);
+                                /*OffsetInArchive=*/0, parent);
+  });
+}
+
+void LinkerDriver::enqueueLazyFile(InputFile *file) {
+  enqueueTask([=]() {
+    // Once it has been enqued, it cannot be lazy anymore.
+    file->lazy = false;
+    ctx.symtab.addFile(file);
   });
 }
 
@@ -2111,17 +2125,30 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   {
     llvm::TimeTraceScope timeScope2("Parse & queue inputs");
     bool inLib = false;
+    std::optional<std::shared_future<ArchiveFile *>> inLibArchive;
     for (auto *arg : args) {
       switch (arg->getOption().getID()) {
       case OPT_end_lib:
         if (!inLib)
           error("stray " + arg->getSpelling());
         inLib = false;
+        inLibArchive = std::nullopt;
         break;
       case OPT_start_lib:
         if (inLib)
           error("nested " + arg->getSpelling());
         inLib = true;
+        // In is important to create a fake archive here so that we remember its
+        // placement on the command-line. This will be later needed to resolve
+        // symbols in the archive order required by the MSVC specification.
+        {
+          auto a = std::make_shared<std::promise<ArchiveFile *>>();
+          inLibArchive = a->get_future().share();
+          enqueueTask([=] {
+            a->set_value(
+                make<ArchiveFile>(ctx, MemoryBufferRef({}, "<cmdline-lib>")));
+          });
+        }
         break;
       case OPT_wholearchive_file:
         if (std::optional<StringRef> path = findFileIfNew(arg->getValue()))
@@ -2129,7 +2156,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
         break;
       case OPT_INPUT:
         if (std::optional<StringRef> path = findFileIfNew(arg->getValue()))
-          enqueuePath(*path, isWholeArchive(*path), inLib);
+          enqueuePath(*path, isWholeArchive(*path), inLib, inLibArchive);
         break;
       default:
         // Ignore other options.
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index fa54de05befb58..da3c41e1bca734 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -22,6 +22,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/TarWriter.h"
 #include "llvm/WindowsDriver/MSVCPaths.h"
+#include <future>
 #include <memory>
 #include <optional>
 #include <set>
@@ -91,13 +92,20 @@ class LinkerDriver {
 
   // Used by ArchiveFile to enqueue members.
   void enqueueArchiveMember(const Archive::Child &c, const Archive::Symbol &sym,
-                            StringRef parentName);
+                            StringRef parentName,
+                            ArchiveFile *parent = nullptr);
 
-  void enqueuePDB(StringRef Path) { enqueuePath(Path, false, false); }
+  void enqueuePDB(StringRef Path) {
+    enqueuePath(Path, false, false, /*parent=*/std::nullopt);
+  }
 
   MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> mb);
 
-  void enqueuePath(StringRef path, bool wholeArchive, bool lazy);
+  void enqueuePath(
+      StringRef path, bool wholeArchive, bool lazy,
+      std::optional<std::shared_future<ArchiveFile *>> parent = std::nullopt);
+
+  void enqueueLazyFile(InputFile *file);
 
   std::unique_ptr<llvm::TarWriter> tar; // for /linkrepro
 
@@ -182,10 +190,11 @@ class LinkerDriver {
   StringRef findDefaultEntry();
   WindowsSubsystem inferSubsystem();
 
-  void addBuffer(std::unique_ptr<MemoryBuffer> mb, bool wholeArchive,
-                 bool lazy);
+  void addBuffer(std::unique_ptr<MemoryBuffer> mb, bool wholeArchive, bool lazy,
+                 ArchiveFile *parent = nullptr);
   void addArchiveBuffer(MemoryBufferRef mbref, StringRef symName,
-                        StringRef parentName, uint64_t offsetInArchive);
+                        StringRef parentName, uint64_t offsetInArchive,
+                        ArchiveFile *parent = nullptr);
 
   void enqueueTask(std::function<void()> task);
   bool run();
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 037fae45242c6f..42cdd1cf3b6c2c 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -94,9 +94,12 @@ static bool ignoredSymbolName(StringRef name) {
 }
 
 ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
-    : InputFile(ctx, ArchiveKind, m) {}
+    : InputFile(ctx, ArchiveKind, m, /*lazy=*/true) {
+  static unsigned Order = 0;
+  CmdLineIndex = Order++;
+}
 
-void ArchiveFile::parse() {
+void ArchiveFile::parseLazy() {
   // Parse a MemoryBufferRef as an archive file.
   file = CHECK(Archive::create(mb), this);
 
@@ -115,7 +118,7 @@ void ArchiveFile::addMember(const Archive::Symbol &sym) {
   if (!seen.insert(c.getChildOffset()).second)
     return;
 
-  ctx.driver.enqueueArchiveMember(c, sym, getName());
+  ctx.driver.enqueueArchiveMember(c, sym, getName(), this);
 }
 
 std::vector<MemoryBufferRef> lld::coff::getArchiveMembers(Archive *file) {
@@ -1000,8 +1003,8 @@ void ImportFile::parse() {
 
 BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
                          StringRef archiveName, uint64_t offsetInArchive,
-                         bool lazy)
-    : InputFile(ctx, BitcodeKind, mb, lazy) {
+                         bool lazy, ArchiveFile *parent)
+    : InputFile(ctx, BitcodeKind, mb, lazy), parent(parent) {
   std::string path = mb.getBufferIdentifier().str();
   if (ctx.config.thinLTOIndexOnly)
     path = replaceThinLTOSuffix(mb.getBufferIdentifier(),
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 3b55cd791bfda2..7070f51fdf78ac 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -66,7 +66,6 @@ class InputFile {
   enum Kind {
     ArchiveKind,
     ObjectKind,
-    LazyObjectKind,
     PDBKind,
     ImportKind,
     BitcodeKind,
@@ -105,7 +104,7 @@ class InputFile {
 
 public:
   // True if this is a lazy ObjFile or BitcodeFile.
-  bool lazy = false;
+  bool lazy;
 };
 
 // .lib or .a file.
@@ -113,23 +112,30 @@ class ArchiveFile : public InputFile {
 public:
   explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m);
   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
-  void parse() override;
+  void parse() override{};
+  void parseLazy();
 
   // Enqueues an archive member load for the given symbol. If we've already
   // enqueued a load for the same archive member, this function does nothing,
   // which ensures that we don't load the same member more than once.
   void addMember(const Archive::Symbol &sym);
 
-private:
   std::unique_ptr<Archive> file;
+
+  // The order this archive was seen on the cmd-line. This is later needed for
+  // resolving undefined symbols in archive OBJs.
+  uint32_t CmdLineIndex;
+
+private:
   llvm::DenseSet<uint64_t> seen;
 };
 
 // .obj or .o file. This may be a member of an archive file.
 class ObjFile : public InputFile {
 public:
-  explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false)
-      : InputFile(ctx, ObjectKind, m, lazy) {}
+  explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false,
+                   ArchiveFile *parent = nullptr)
+      : InputFile(ctx, ObjectKind, m, lazy), parent(parent) {}
   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
   void parse() override;
   void parseLazy();
@@ -182,6 +188,9 @@ class ObjFile : public InputFile {
   // True if this file was compiled with /guard:ehcont.
   bool hasGuardEHCont() { return feat00Flags & 0x4000; }
 
+  // Whether this Obj buffer is part of an archive.
+  ArchiveFile *parent;
+
   // Pointer to the PDB module descriptor builder. Various debug info records
   // will reference object files by "module index", which is here. Things like
   // source files and section contributions are also recorded here. Will be null
@@ -369,7 +378,7 @@ class BitcodeFile : public InputFile {
 public:
   explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
                        StringRef archiveName, uint64_t offsetInArchive,
-                       bool lazy);
+                       bool lazy = false, ArchiveFile *parent = nullptr);
   ~BitcodeFile();
   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
   ArrayRef<Symbol *> getSymbols() { return symbols; }
@@ -377,6 +386,9 @@ class BitcodeFile : public InputFile {
   void parseLazy();
   std::unique_ptr<llvm::lto::InputFile> obj;
 
+  // Whether this bitcode buffer is part of an archive.
+  ArchiveFile *parent;
+
 private:
   void parse() override;
 
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 44aa506d2c35da..f570e8c211f43d 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -54,8 +54,10 @@ void SymbolTable::addFile(InputFile *file) {
   if (file->lazy) {
     if (auto *f = dyn_cast<BitcodeFile>(file))
       f->parseLazy();
-    else
-      cast<ObjFile>(file)->parseLazy();
+    else if (auto *o = dyn_cast<ObjFile>(file))
+      o->parseLazy();
+    else if (auto *a = dyn_cast<ArchiveFile>(file))
+      a->parseLazy();
   } else {
     file->parse();
     if (auto *f = dyn_cast<ObjFile>(file)) {
@@ -102,7 +104,7 @@ static void forceLazy(Symbol *s) {
   }
   case Symbol::Kind::LazyObjectKind: {
     InputFile *file = cast<LazyObject>(s)->file;
-    file->ctx.symtab.addFile(file);
+    file->ctx.driver.enqueueLazyFile(file);
     break;
   }
   case Symbol::Kind::LazyDLLSymbolKind: {
@@ -562,6 +564,57 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
   return result;
 }
 
+static LazyIntrusiveNode *lazyNode(Symbol *s) {
+  if (auto *sym = dyn_cast<LazyArchive>(s))
+    return &sym->node;
+  if (auto *sym = dyn_cast<LazyObject>(s))
+    return &sym->node;
+  return nullptr;
+}
+
+static ArchiveFile *lazyParent(InputFile *f) {
+  if (!f)
+    return nullptr;
+  if (auto *obj = dyn_cast<ObjFile>(f))
+    return obj->parent;
+  if (auto *obj = dyn_cast<BitcodeFile>(f))
+    return obj->parent;
+  return nullptr;
+}
+
+static ArchiveFile *lazyArchive(Symbol *s) {
+  if (auto *sym = dyn_cast<LazyArchive>(s))
+    return sym->file;
+  if (auto *sym = dyn_cast<LazyObject>(s))
+    return lazyParent(sym->file);
+  return nullptr;
+}
+
+// The search behavior for undefined symbols is different when the OBJ
+// was pulled from an archive (LIB). This is documented here:
+// https://learn.microsoft.com/en-us/cpp/build/reference/link-input-files?view=msvc-170
+// "Object files on the command line are processed in the order they
+// appear on the command line. Libraries are searched in command line
+// order as well, with the following caveat: Symbols that are unresolved
+// when bringing in an object file from a library are searched for in
+// that library first, and then the following libraries from the command
+// line and /DEFAULTLIB (Specify default library) directives, and then
+// to any libraries at the beginning of the command line."
+static Symbol *searchArchiveSymbol(Symbol *s, ArchiveFile *pivot) {
+  auto &Alloc = getSpecificAllocSingleton<SymbolUnion>().Allocator;
+  Symbol *curr = s;
+  for (;;) {
+    if (lazyArchive(curr)->CmdLineIndex >= pivot->CmdLineIndex)
+      return curr;
+    uint32_t next = lazyNode(curr)->next;
+    if (!next)
+      break;
+    curr = reinterpret_cast<LazyArchive *>(
+        Alloc.fromAlignedIndex<SymbolUnion>(next));
+  }
+  return s;
+}
+
 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
                                   bool isWeakAlias) {
   auto [s, wasInserted] = insert(name, f);
@@ -569,11 +622,43 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
     replaceSymbol<Undefined>(s, name);
     return s;
   }
-  if (s->isLazy())
+  if (s->isLazy()) {
+    if (ArchiveFile *parent = lazyParent(f)) {
+      Symbol *selected = searchArchiveSymbol(s, parent);
+      forceLazy(selected);
+      // Now that we have selected a symbol, we don't need the linked list of
+      // `LazyArchive`s anymore. Collapse to the selected symbol.
+      memcpy(s, selected, sizeof(SymbolUnion));
+      return s;
+    }
     forceLazy(s);
+  }
   return s;
 }
 
+// This creates a linked list of archives where a specific symbol was seen.
+// We later walk that list if a undefined symbol needs to be resolved from an
+// archive OBJ.
+template <typename T, typename... ArgT>
+static void chainLazy(LazyIntrusiveNode *front, ArgT &&...arg) {
+  // Chain with symbols defined in other archives
+  Symbol *newSym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
+  newSym->canInline = true;
+  replaceSymbol<T>(newSym, std::forward<ArgT>(arg)...);
+
+  auto &Alloc = getSpecificAllocSingleton<SymbolUnion>().Allocator;
+  uint32_t index = Alloc.identifyKnownAlignedObject<SymbolUnion>(newSym);
+
+  if (!front->next)
+    front->next = index;
+  if (front->last) {
+    Symbol *last = reinterpret_cast<Symbol *>(
+        Alloc.fromAlignedIndex<SymbolUnion>(front->last));
+    lazyNode(last)->next = index;
+  }
+  front->last = index;
+}
+
 void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
   StringRef name = sym.getName();
   auto [s, wasInserted] = insert(name);
@@ -581,6 +666,10 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
     replaceSymbol<LazyArchive>(s, f, sym);
     return;
   }
+  if (auto *n = lazyNode(s)) {
+    chainLazy<LazyArchive>(n, f, sym);
+    return;
+  }
   auto *u = dyn_cast<Undefined>(s);
   if (!u || u->weakAlias || s->pendingArchiveLoad)
     return;
@@ -588,19 +677,22 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
   f->addMember(sym);
 }
 
-void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
+void SymbolTable::addLazyObject(InputFile *f, StringRef name) {
   assert(f->lazy);
-  auto [s, wasInserted] = insert(n, f);
+  auto [s, wasInserted] = insert(name, f);
   if (wasInserted) {
-    replaceSymbol<LazyObject>(s, f, n);
+    replaceSymbol<LazyObject>(s, f, name);
+    return;
+  }
+  if (auto *n = lazyNode(s)) {
+    chainLazy<LazyObject>(n, f, name);
     return;
   }
   auto *u = dyn_cast<Undefined>(s);
   if (!u || u->weakAlias || s->pendingArchiveLoad)
     return;
   s->pendingArchiveLoad = true;
-  f->lazy = false;
-  addFile(f);
+  f->ctx.driver.enqueueLazyFile(f);
 }
 
 void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym,
diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index ca69fb2d052706..1577406c8626cf 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -286,6 +286,15 @@ class DefinedSynthetic : public Defined {
   uint32_t offset;
 };
 
+// Keep track of symbols with the same name exposed by archives. This is
+// required to later resolve unresolved symbols in the same order as required
+// by the MSVC spec. These are indexes in the specific bump allocator for
+// SymbolUnion.
+struct LazyIntrusiveNode {
+  uint32_t next = 0;
+  uint32_t last = 0;
+};
+
 // This class represents a symbol defined in an archive file. It is
 // created from an archive file header, and it knows how to load an
 // object file from an archive to replace itself with a defined
@@ -302,6 +311,7 @@ class LazyArchive : public Symbol {
 
   ArchiveFile *file;
   const Archive::Symbol sym;
+  LazyIntrusiveNode node;
 };
 
 class LazyObject : public Symbol {
@@ -309,6 +319,7 @@ class LazyObject : public Symbol {
   LazyObject(InputFile *f, StringRef n) : Symbol(LazyObjectKind, n), file(f) {}
   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
   InputFile *file;
+  LazyIntrusiveNode node;
 };
 
 // MinGW only.
diff --git a/lld/test/COFF/duplicate-imp-func.s b/lld/test/COFF/duplicate-imp-func.s
index fc0cf1ef6ae051..631c714c951f77 100644
--- a/lld/test/COFF/duplicate-imp-func.s
+++ b/lld/test/COFF/duplicate-imp-func.s
@@ -28,8 +28,10 @@
 # Once the import library member from %t.lib.dll.a gets loaded, libfunc
 # and __imp_libfunc already are defined.
 
-# Just check that this fails cleanly (doesn't crash).
-# RUN: not lld-link -lldmingw -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper.a
+# This test should now succeed since we're following the MSVC symbol searching behvior described in:
+# https://learn.microsoft.com/en-us/cpp/build/reference/link-input-files?view=msvc-170
+# In this case, the linker will select the libfunc symbol in %t.helper.a
+# RUN: lld-link -lldmingw -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper.a
 
 # Test with %t.helper.a on the command line; in this case we won't try to
 # include libfunc from %t.lib.dll.a and everything works fine.
diff --git a/lld/test/COFF/lib-searching-behavior.s b/lld/test/COFF/lib-searching-behavior.s
new file mode 100644
index 00000000000000..eb4ba55c397534
--- /dev/null
+++ b/lld/test/COFF/lib-searching-behavior.s
@@ -0,0 +1,67 @@
+# REQUIRES: x86
+
+# This test ensures that we're following the MSVC symbol searching behvior described in:
+# https://learn.microsoft.com/en-us/cpp/build/reference/link-input-files?view=msvc-170
+# "Object files on the command line are processed in the order they appear on the command line.
+# Libraries are searched in command line order as well, with the following caveat: Symbols that
+# are unresolved when bringing in an object file from a library are searched for in that library
+# first, and then the following libraries from the command line and /DEFAULTLIB (Specify default
+# library) directives, and then to any libraries at the beginning of the command line."
+
+# RUN: echo -e ".intel_syntax noprefix\n.globl libfunc\n.text\nlibfunc:\nmov eax, 1\nret\n.section .drectve\n.ascii \"/EXPORT:libfunc\"" > %t.lib.s
+# RUN: llvm-mc -triple=x86_64-pc-windows-msvc %t.lib.s -filetype=obj -o %t.lib.o
+# RUN: lld-link -dll -out:%t.lib.dll -entry:libfunc %t.lib.o -implib:%t.lib.dll.a
+
+# RUN: echo -e ".globl helper\n.text\nhelper:\ncall libfunc\nret" > %t.helper1.s
+# RUN: echo -e ".intel_syntax noprefix\n.globl libfunc\n.text\nlibfunc:\nxor eax, eax\nret" > %t.helper2.s
+# RUN: llvm-mc -triple=x86_64-pc-windows-msvc %t.helper1.s -filetype=obj -o %t.helper1.o
+# RUN: llvm-mc -triple=x86_64-pc-windows-msvc %t.helper2.s -filetype=obj -o %t.helper2.o
+
+# RUN: llvm-ar rcs %t.helper.a %t.helper1.o %t.helper2.o
+
+# RUN: llvm-mc -triple=x86_64-pc-windows-msvc %s -filetype=obj -o %t.main.o
+
+# Simulate a setup, where two libraries provide the same function;
+# %t.lib.dll.a is a pure import library which provides a import symbol "libfunc".
+# %t.helper.a is a static library which contains "helper1" and "helper2".
+#
+# helper1 contains an undefined reference to libfunc. helper2 contains an
+# implementation of libfunc.
+#
+# First %t.main.o is processed and pushes a undefined symbol 'helper'.
+# Then %t.lib.dll.a is processed a pushes the lazy archive symbol 'libfunc' in the symbol table.
+# Then comes %t.helper.a and it pushes 'helper' and 'libfunc' as lazy symbols. Then 'helper' is
+# resolved and that pushes 'libfunc' as a undefined symbol. That pulls on %t.helper.a(%t.helper2.o)
+# which contains the 'libfunc' symbol, resolving it. This is illustrative of the MSVC library searching
+# behavior which starts with the current library object which requested the unresolved symbol.
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper.a
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB %s
+
+# In this case, the symbol in %t.helper.a(%t.helper2.o) is still considered first.
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.helper.a %t.lib.dll.a
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB %s
+
+# In this test we're defining libfunc in a third library that comes after all the others. The symbol should be pulled
+# now from that third library.
+# RUN: llvm-ar rcs %t.helper1.a %t.helper1.o
+# RUN: llvm-ar rcs %t.helper2.a %t.helper2.o
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper1.a %t.helper2.a
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB %s
+
+# LIB: 140001000 <.text>:
+# LIB: 140001000: e8 03 00 00 00                   callq   0x140001008 <.text+0x8>
+# LIB: 140001008: e8 03 00 00 00                   callq   0x140001010 <.text+0x10>
+# LIB: 140001010: 31 c0                            xorl    %eax, %eax
+
+# In this last test, we should pick up the import symbol from %t.lib.dll.a since it isn't defined anywhere else.
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper1.a
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB-IMP %s
+
+# LIB-IMP: 140001000 <.text>:
+# LIB-IMP: 140001010: ff 25 22 10 00 00            jmpq    *4130(%rip)
+
+    .globl main
+    .text
+main:
+    call helper
+    ret
diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index c1e5c6d2853bd5..8c061b72d65f72 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -278,6 +278,32 @@ class BumpPtrAllocatorImpl
     return Out / alignof(T);
   }
 
+  /// Gets an already allocated object from an index that was previously
+  /// retrieved with `identifyKnownAlignedObject`.
+  template <typename T> T *fromAlignedIndex(int64_t Index) {
+    Index *= alignof(T);
+
+    int64_t InSlabIdx = 0;
+    for (size_t Idx = 0, E = Slabs.size(); Idx < E; Idx++) {
+      char *S = static_cast<char *>(Slabs[Idx]);
+      if (Index >= InSlabIdx &&
+          Index < InSlabIdx + static_cast<int64_t>(computeSlabSize(Idx)))
+        return reinterpret_cast<T *>(S + (Index - InSlabIdx));
+      InSlabIdx += static_cast<int64_t>(computeSlabSize(Idx));
+    }
+
+    // Use negative index to denote custom sized slabs.
+    int64_t InCustomSizedSlabIdx = -1;
+    for (size_t Idx = 0, E = CustomSizedSlabs.size(); Idx < E; Idx++) {
+      char *S = static_cast<char *>(CustomSizedSlabs[Idx].first);
+      int64_t Size = static_cast<int64_t>(CustomSizedSlabs[Idx].second);
+      if (Index <= InCustomSizedSlabIdx && Index > InCustomSizedSlabIdx - Size)
+        return reinterpret_cast<T *>(S - (Index - InCustomSizedSlabIdx));
+      InCustomSizedSlabIdx -= static_cast<int64_t>(Size);
+    }
+    return nullptr;
+  }
+
   size_t getTotalMemory() const {
     size_t TotalMemory = 0;
     for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I)
@@ -380,9 +406,9 @@ typedef BumpPtrAllocatorImpl<> BumpPtrAllocator;
 /// This allows calling the destructor in DestroyAll() and when the allocator is
 /// destroyed.
 template <typename T> class SpecificBumpPtrAllocator {
+public:
   BumpPtrAllocator Allocator;
 
-public:
   SpecificBumpPtrAllocator() {
     // Because SpecificBumpPtrAllocator walks the memory to call destructors,
     // it can't have red zones between allocations.

>From b1149e14bed66742286093dcb68399029980cdaf Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <aganea at havenstudios.com>
Date: Thu, 14 Mar 2024 14:29:25 -0400
Subject: [PATCH 2/3] Revert unneeded changes

---
 lld/COFF/SymbolTable.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index f570e8c211f43d..1b0e073ac00526 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -677,15 +677,15 @@ void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) {
   f->addMember(sym);
 }
 
-void SymbolTable::addLazyObject(InputFile *f, StringRef name) {
+void SymbolTable::addLazyObject(InputFile *f, StringRef n) {
   assert(f->lazy);
-  auto [s, wasInserted] = insert(name, f);
+  auto [s, wasInserted] = insert(n, f);
   if (wasInserted) {
-    replaceSymbol<LazyObject>(s, f, name);
+    replaceSymbol<LazyObject>(s, f, n);
     return;
   }
-  if (auto *n = lazyNode(s)) {
-    chainLazy<LazyObject>(n, f, name);
+  if (auto *node = lazyNode(s)) {
+    chainLazy<LazyObject>(node, f, n);
     return;
   }
   auto *u = dyn_cast<Undefined>(s);

>From 6c2a7770dccde79e0b0b61b004cec4447d24848f Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <aganea at havenstudios.com>
Date: Thu, 14 Mar 2024 14:29:48 -0400
Subject: [PATCH 3/3] Test cmd-line libraries

---
 lld/test/COFF/lib-searching-behavior.s | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lld/test/COFF/lib-searching-behavior.s b/lld/test/COFF/lib-searching-behavior.s
index eb4ba55c397534..4ba786f015f449 100644
--- a/lld/test/COFF/lib-searching-behavior.s
+++ b/lld/test/COFF/lib-searching-behavior.s
@@ -53,13 +53,17 @@
 # LIB: 140001008: e8 03 00 00 00                   callq   0x140001010 <.text+0x10>
 # LIB: 140001010: 31 c0                            xorl    %eax, %eax
 
-# In this last test, we should pick up the import symbol from %t.lib.dll.a since it isn't defined anywhere else.
+# Here, we should pick up the import symbol from %t.lib.dll.a since it isn't defined anywhere else.
 # RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a %t.helper1.a
 # RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB-IMP %s
 
 # LIB-IMP: 140001000 <.text>:
 # LIB-IMP: 140001010: ff 25 22 10 00 00            jmpq    *4130(%rip)
 
+# Test cmd-line archives
+# RUN: lld-link -out:%t.main.exe -entry:main %t.main.o %t.lib.dll.a -start-lib %t.helper1.o %t.helper2.o -end-lib
+# RUN: llvm-objdump --no-print-imm-hex -d %t.main.exe | FileCheck --check-prefix=LIB %s
+
     .globl main
     .text
 main:



More information about the llvm-commits mailing list