[lld] [lld][WebAssembly] Match the ELF linker in transitioning away from archive indexes. (PR #78658)
Sam Clegg via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 19 15:46:24 PST 2024
https://github.com/sbc100 updated https://github.com/llvm/llvm-project/pull/78658
>From a66a9a74822032c066f1a44e4470dda78f3de96c Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc at chromium.org>
Date: Thu, 18 Jan 2024 10:09:48 +0000
Subject: [PATCH] [lld][WebAssembly] Match the ELF linker in transitioning away
from archive indexes.
The ELF linker transitioned away from archive indexes in
https://reviews.llvm.org/D117284.
This paves the way for supporting `--start-lib`/`--end-lib` (See #77960)
The ELF linker unified library handling with `--start-lib`/`--end-lib`
handling in https://reviews.llvm.org/D119074.
---
lld/docs/ReleaseNotes.rst | 4 ++
lld/test/wasm/archive-no-index.s | 14 ----
lld/test/wasm/bad-archive-member.s | 2 +-
lld/wasm/Driver.cpp | 31 +++++----
lld/wasm/InputFiles.cpp | 101 ++++++++++++++---------------
lld/wasm/InputFiles.h | 41 +++---------
lld/wasm/SymbolTable.cpp | 23 ++++---
lld/wasm/SymbolTable.h | 2 +-
lld/wasm/Symbols.cpp | 20 ++----
lld/wasm/Symbols.h | 15 ++---
10 files changed, 105 insertions(+), 148 deletions(-)
delete mode 100644 lld/test/wasm/archive-no-index.s
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index c322b776ff58f6..01669543cd50ca 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -50,5 +50,9 @@ MachO Improvements
WebAssembly Improvements
------------------------
+* Indexes are no longer required on archive files. Instead symbol information
+ is read from object files within the archive. This matches the behaviour of
+ the ELF linker.
+
Fixes
#####
diff --git a/lld/test/wasm/archive-no-index.s b/lld/test/wasm/archive-no-index.s
deleted file mode 100644
index 99ca5a367d3c6d..00000000000000
--- a/lld/test/wasm/archive-no-index.s
+++ /dev/null
@@ -1,14 +0,0 @@
-# Tests error on archive file without a symbol table
-# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: llvm-as -o %t.archive.o %S/Inputs/archive1.ll
-# RUN: rm -f %t.a
-# RUN: llvm-ar crS %t.a %t.archive.o
-
-# RUN: not wasm-ld -o out.wasm %t.o %t.a 2>&1 | FileCheck %s
-
- .globl _start
-_start:
- .functype _start () -> ()
- end_function
-
-# CHECK: archive has no index; run ranlib to add one
diff --git a/lld/test/wasm/bad-archive-member.s b/lld/test/wasm/bad-archive-member.s
index 029027a8517a36..77bf16871ca5b5 100644
--- a/lld/test/wasm/bad-archive-member.s
+++ b/lld/test/wasm/bad-archive-member.s
@@ -5,7 +5,7 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux -o %t.dir/elf.o %s
# RUN: llvm-ar rcs %t.dir/libfoo.a %t.dir/elf.o
# RUN: not wasm-ld %t.dir/libfoo.a -o /dev/null 2>&1 | FileCheck %s
-# CHECK: error: unknown file type: {{.*}}libfoo.a(elf.o)
+# CHECK: warning: {{.*}}libfoo.a: archive member 'elf.o' is neither Wasm object file nor LLVM bitcode
.globl _start
_start:
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 32c042b5695a4b..6e31f0eeb08541 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -272,9 +272,11 @@ void LinkerDriver::addFile(StringRef path) {
if (fs::exists(importFile))
readImportFile(importFile.str());
+ auto members = getArchiveMembers(mbref);
+
// Handle -whole-archive.
if (inWholeArchive) {
- for (const auto &[m, offset] : getArchiveMembers(mbref)) {
+ for (const auto &[m, offset] : members) {
auto *object = createObjectFile(m, path, offset);
// Mark object as live; object members are normally not
// live by default but -whole-archive is designed to treat
@@ -289,12 +291,15 @@ void LinkerDriver::addFile(StringRef path) {
std::unique_ptr<Archive> file =
CHECK(Archive::create(mbref), path + ": failed to parse archive");
- if (!file->isEmpty() && !file->hasSymbolTable()) {
- error(mbref.getBufferIdentifier() +
- ": archive has no index; run ranlib to add one");
+ for (const auto &[m, offset] : members) {
+ auto magic = identify_magic(m.getBuffer());
+ if (magic == file_magic::wasm_object || magic == file_magic::bitcode)
+ files.push_back(createObjectFile(m, path, offset, true));
+ else
+ warn(path + ": archive member '" + m.getBufferIdentifier() +
+ "' is neither Wasm object file nor LLVM bitcode");
}
- files.push_back(make<ArchiveFile>(mbref));
return;
}
case file_magic::bitcode:
@@ -732,16 +737,10 @@ static Symbol *handleUndefined(StringRef name, const char *option) {
static void handleLibcall(StringRef name) {
Symbol *sym = symtab->find(name);
- if (!sym)
- return;
-
- if (auto *lazySym = dyn_cast<LazySymbol>(sym)) {
- MemoryBufferRef mb = lazySym->getMemberBuffer();
- if (isBitcode(mb)) {
- if (!config->whyExtract.empty())
- ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
- lazySym->extract();
- }
+ if (sym && sym->isLazy() && isa<BitcodeFile>(sym->getFile())) {
+ if (!config->whyExtract.empty())
+ ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
+ cast<LazySymbol>(sym)->extract();
}
}
@@ -767,7 +766,7 @@ static void writeWhyExtract() {
// Equivalent of demote demoteSharedAndLazySymbols() in the ELF linker
static void demoteLazySymbols() {
for (Symbol *sym : symtab->symbols()) {
- if (auto* s = dyn_cast<LazySymbol>(sym)) {
+ if (auto *s = dyn_cast<LazySymbol>(sym)) {
if (s->signature) {
LLVM_DEBUG(llvm::dbgs()
<< "demoting lazy func: " << s->getName() << "\n");
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index 19c76e49027896..f5e946aca8b2a8 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -75,7 +75,7 @@ std::optional<MemoryBufferRef> readFile(StringRef path) {
}
InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
- uint64_t offsetInArchive) {
+ uint64_t offsetInArchive, bool lazy) {
file_magic magic = identify_magic(mb.getBuffer());
if (magic == file_magic::wasm_object) {
std::unique_ptr<Binary> bin =
@@ -83,18 +83,11 @@ InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
auto *obj = cast<WasmObjectFile>(bin.get());
if (obj->isSharedObject())
return make<SharedFile>(mb);
- return make<ObjFile>(mb, archiveName);
+ return make<ObjFile>(mb, archiveName, lazy);
}
- if (magic == file_magic::bitcode)
- return make<BitcodeFile>(mb, archiveName, offsetInArchive);
-
- std::string name = mb.getBufferIdentifier().str();
- if (!archiveName.empty()) {
- name = archiveName.str() + "(" + name + ")";
- }
-
- fatal("unknown file type: " + name);
+ assert(magic == file_magic::bitcode);
+ return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy);
}
// Relocations contain either symbol or type indices. This function takes a
@@ -391,9 +384,30 @@ static bool shouldMerge(const WasmSegment &seg) {
return true;
}
-void ObjFile::parse(bool ignoreComdats) {
- // Parse a memory buffer as a wasm file.
- LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
+void ObjFile::parseLazy() {
+ LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << "\n");
+ for (const SymbolRef &sym : wasmObj->symbols()) {
+ const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
+ if (!wasmSym.isDefined())
+ continue;
+ symtab->addLazy(wasmSym.Info.Name, this);
+ // addLazy() may trigger this->extract() if an existing symbol is an
+ // undefined symbol. If that happens, this function has served its purpose,
+ // and we can exit from the loop early.
+ if (!lazy)
+ break;
+ }
+}
+
+ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy)
+ : InputFile(ObjectKind, m) {
+ this->lazy = lazy;
+ this->archiveName = std::string(archiveName);
+
+ // If this isn't part of an archive, it's eagerly linked, so mark it live.
+ if (archiveName.empty())
+ markLive();
+
std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this));
auto *obj = dyn_cast<WasmObjectFile>(bin.get());
@@ -406,6 +420,11 @@ void ObjFile::parse(bool ignoreComdats) {
wasmObj.reset(obj);
checkArch(obj->getArch());
+}
+
+void ObjFile::parse(bool ignoreComdats) {
+ // Parse a memory buffer as a wasm file.
+ LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n");
// Build up a map of function indices to table indices for use when
// verifying the existing table index relocations
@@ -717,43 +736,6 @@ void StubFile::parse() {
}
}
-void ArchiveFile::parse() {
- // Parse a MemoryBufferRef as an archive file.
- LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
- file = CHECK(Archive::create(mb), toString(this));
-
- // Read the symbol table to construct Lazy symbols.
- int count = 0;
- for (const Archive::Symbol &sym : file->symbols()) {
- symtab->addLazy(this, &sym);
- ++count;
- }
- LLVM_DEBUG(dbgs() << "Read " << count << " symbols\n");
- (void) count;
-}
-
-void ArchiveFile::addMember(const Archive::Symbol *sym) {
- const Archive::Child &c =
- CHECK(sym->getMember(),
- "could not get the member for symbol " + sym->getName());
-
- // Don't try to load the same member twice (this can happen when members
- // mutually reference each other).
- if (!seen.insert(c.getChildOffset()).second)
- return;
-
- LLVM_DEBUG(dbgs() << "loading lazy: " << sym->getName() << "\n");
- LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
-
- MemoryBufferRef mb =
- CHECK(c.getMemoryBufferRef(),
- "could not get the buffer for the member defining symbol " +
- sym->getName());
-
- InputFile *obj = createObjectFile(mb, getName(), c.getChildOffset());
- symtab->addFile(obj, sym->getName());
-}
-
static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
switch (gvVisibility) {
case GlobalValue::DefaultVisibility:
@@ -790,8 +772,9 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
}
BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
- uint64_t offsetInArchive)
+ uint64_t offsetInArchive, bool lazy)
: InputFile(BitcodeKind, m) {
+ this->lazy = lazy;
this->archiveName = std::string(archiveName);
std::string path = mb.getBufferIdentifier().str();
@@ -817,6 +800,20 @@ BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
bool BitcodeFile::doneLTO = false;
+void BitcodeFile::parseLazy() {
+ for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
+ if (irSym.isUndefined())
+ continue;
+ StringRef name = saver().save(irSym.getName());
+ symtab->addLazy(name, this);
+ // addLazy() may trigger this->extract() if an existing symbol is an
+ // undefined symbol. If that happens, this function has served its purpose,
+ // and we can exit from the loop early.
+ if (!lazy)
+ break;
+ }
+}
+
void BitcodeFile::parse(StringRef symName) {
if (doneLTO) {
error(toString(this) + ": attempt to add bitcode file after LTO (" + symName + ")");
diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index d9a8b530660324..fd3d5e5ef47967 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -14,7 +14,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/LTO/LTO.h"
-#include "llvm/Object/Archive.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/TargetParser/Triple.h"
@@ -45,7 +44,6 @@ class InputFile {
enum Kind {
ObjectKind,
SharedKind,
- ArchiveKind,
BitcodeKind,
StubKind,
};
@@ -69,6 +67,11 @@ class InputFile {
void markLive() { live = true; }
bool isLive() const { return live; }
+ // True if this file is exists as in an archive file and has not yet been
+ // extracted.
+ // TODO(sbc): Use this to implement --start-lib/--end-lib.
+ bool lazy = false;
+
protected:
InputFile(Kind k, MemoryBufferRef m)
: mb(m), fileKind(k), live(!config->gcSections) {}
@@ -85,35 +88,14 @@ class InputFile {
bool live;
};
-// .a file (ar archive)
-class ArchiveFile : public InputFile {
-public:
- explicit ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {}
- static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
-
- void addMember(const llvm::object::Archive::Symbol *sym);
-
- void parse();
-
-private:
- std::unique_ptr<llvm::object::Archive> file;
- llvm::DenseSet<uint64_t> seen;
-};
-
// .o file (wasm object file)
class ObjFile : public InputFile {
public:
- explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
- : InputFile(ObjectKind, m) {
- this->archiveName = std::string(archiveName);
-
- // If this isn't part of an archive, it's eagerly linked, so mark it live.
- if (archiveName.empty())
- markLive();
- }
+ ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy = false);
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
void parse(bool ignoreComdats = false);
+ void parseLazy();
// Returns the underlying wasm file.
const WasmObjectFile *getWasmObj() const { return wasmObj.get(); }
@@ -173,10 +155,11 @@ class SharedFile : public InputFile {
class BitcodeFile : public InputFile {
public:
BitcodeFile(MemoryBufferRef m, StringRef archiveName,
- uint64_t offsetInArchive);
+ uint64_t offsetInArchive, bool lazy);
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
void parse(StringRef symName);
+ void parseLazy();
std::unique_ptr<llvm::lto::InputFile> obj;
// Set to true once LTO is complete in order prevent further bitcode objects
@@ -196,14 +179,10 @@ class StubFile : public InputFile {
llvm::DenseMap<StringRef, std::vector<StringRef>> symbolDependencies;
};
-inline bool isBitcode(MemoryBufferRef mb) {
- return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
-}
-
// Will report a fatal() error if the input buffer is not a valid bitcode
// or wasm object file.
InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "",
- uint64_t offsetInArchive = 0);
+ uint64_t offsetInArchive = 0, bool lazy = false);
// Opens a given file.
std::optional<MemoryBufferRef> readFile(StringRef path);
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index 9988490e14b0bc..c98aa3ee3a7a32 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -26,9 +26,13 @@ SymbolTable *symtab;
void SymbolTable::addFile(InputFile *file, StringRef symName) {
log("Processing: " + toString(file));
- // .a file
- if (auto *f = dyn_cast<ArchiveFile>(file)) {
- f->parse();
+ // Lazy object file
+ if (file->lazy) {
+ if (auto *f = dyn_cast<BitcodeFile>(file)) {
+ f->parseLazy();
+ } else {
+ cast<ObjFile>(file)->parseLazy();
+ }
return;
}
@@ -737,16 +741,15 @@ TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
return nullptr;
}
-void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
- LLVM_DEBUG(dbgs() << "addLazy: " << sym->getName() << "\n");
- StringRef name = sym->getName();
+void SymbolTable::addLazy(StringRef name, InputFile *file) {
+ LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n");
Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insertName(name);
if (wasInserted) {
- replaceSymbol<LazySymbol>(s, name, 0, file, *sym);
+ replaceSymbol<LazySymbol>(s, name, 0, file);
return;
}
@@ -763,15 +766,15 @@ void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
if (auto *f = dyn_cast<UndefinedFunction>(s))
oldSig = f->signature;
LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
- auto newSym = replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK,
- file, *sym);
+ auto newSym =
+ replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK, file);
newSym->signature = oldSig;
return;
}
LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
const InputFile *oldFile = s->getFile();
- file->addMember(sym);
+ replaceSymbol<LazySymbol>(s, name, 0, file)->extract();
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
}
diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h
index c5518ee23da26d..42ebb8be8eb3f8 100644
--- a/lld/wasm/SymbolTable.h
+++ b/lld/wasm/SymbolTable.h
@@ -83,7 +83,7 @@ class SymbolTable {
TableSymbol *resolveIndirectFunctionTable(bool required);
- void addLazy(ArchiveFile *f, const llvm::object::Archive::Symbol *sym);
+ void addLazy(StringRef name, InputFile *f);
bool addComdat(StringRef name);
diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index 47d8d09ab1bd42..c67a054e4636bb 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -13,6 +13,7 @@
#include "InputFiles.h"
#include "OutputSections.h"
#include "OutputSegment.h"
+#include "SymbolTable.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/Demangle/Demangle.h"
@@ -61,8 +62,8 @@ std::string toString(wasm::Symbol::Kind kind) {
return "UndefinedTable";
case wasm::Symbol::UndefinedTagKind:
return "UndefinedTag";
- case wasm::Symbol::LazyKind:
- return "LazyKind";
+ case wasm::Symbol::LazySymbolKind:
+ return "LazySymbolKind";
case wasm::Symbol::SectionKind:
return "SectionKind";
case wasm::Symbol::OutputSectionKind:
@@ -426,23 +427,16 @@ const OutputSectionSymbol *SectionSymbol::getOutputSectionSymbol() const {
}
void LazySymbol::extract() {
- cast<ArchiveFile>(file)->addMember(&archiveSymbol);
+ if (file->lazy) {
+ file->lazy = false;
+ symtab->addFile(file, name);
+ }
}
void LazySymbol::setWeak() {
flags |= (flags & ~WASM_SYMBOL_BINDING_MASK) | WASM_SYMBOL_BINDING_WEAK;
}
-MemoryBufferRef LazySymbol::getMemberBuffer() {
- Archive::Child c =
- CHECK(archiveSymbol.getMember(),
- "could not get the member for symbol " + toString(*this));
-
- return CHECK(c.getMemoryBufferRef(),
- "could not get the buffer for the member defining symbol " +
- toString(*this));
-}
-
void printTraceSymbolUndefined(StringRef name, const InputFile* file) {
message(toString(file) + ": reference to " + name);
}
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index 69ebfdb5bb356e..7a12c7e36958d0 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -59,7 +59,7 @@ class Symbol {
UndefinedGlobalKind,
UndefinedTableKind,
UndefinedTagKind,
- LazyKind,
+ LazySymbolKind,
};
Kind kind() const { return symbolKind; }
@@ -73,7 +73,7 @@ class Symbol {
symbolKind == UndefinedTableKind || symbolKind == UndefinedTagKind;
}
- bool isLazy() const { return symbolKind == LazyKind; }
+ bool isLazy() const { return symbolKind == LazySymbolKind; }
bool isLocal() const;
bool isWeak() const;
@@ -497,14 +497,12 @@ class UndefinedTag : public TagSymbol {
// symbols into consideration.
class LazySymbol : public Symbol {
public:
- LazySymbol(StringRef name, uint32_t flags, InputFile *file,
- const llvm::object::Archive::Symbol &sym)
- : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {}
+ LazySymbol(StringRef name, uint32_t flags, InputFile *file)
+ : Symbol(name, LazySymbolKind, flags, file) {}
- static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
+ static bool classof(const Symbol *s) { return s->kind() == LazySymbolKind; }
void extract();
void setWeak();
- MemoryBufferRef getMemberBuffer();
// Lazy symbols can have a signature because they can replace an
// UndefinedFunction in which case we need to be able to preserve the
@@ -512,9 +510,6 @@ class LazySymbol : public Symbol {
// TODO(sbc): This repetition of the signature field is inelegant. Revisit
// the use of class hierarchy to represent symbol taxonomy.
const WasmSignature *signature = nullptr;
-
-private:
- llvm::object::Archive::Symbol archiveSymbol;
};
// linker-generated symbols
More information about the llvm-commits
mailing list