[lld] 950ae43 - [WebAssembly] GC constructor functions in otherwise unused archive objects
Dan Gohman via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 12 18:55:27 PDT 2020
Author: Dan Gohman
Date: 2020-10-12T18:54:57-07:00
New Revision: 950ae43091121ea357f735790f4042264cf40728
URL: https://github.com/llvm/llvm-project/commit/950ae43091121ea357f735790f4042264cf40728
DIFF: https://github.com/llvm/llvm-project/commit/950ae43091121ea357f735790f4042264cf40728.diff
LOG: [WebAssembly] GC constructor functions in otherwise unused archive objects
This allows `__wasilibc_populate_libpreopen` to be GC'd in more cases
where it isn't needed, including when linked from Rust's libstd.
Differential Revision: https://reviews.llvm.org/D85062
Added:
lld/test/wasm/Inputs/ctor-ctor.s
lld/test/wasm/Inputs/ctor-lib.s
lld/test/wasm/Inputs/ctor-setup-call-def.s
lld/test/wasm/Inputs/ctor-setup.s
lld/test/wasm/Inputs/ctor-start.s
lld/test/wasm/ctor-gc-setup.test
lld/test/wasm/ctor-gc.test
lld/test/wasm/ctor-no-gc.test
Modified:
lld/wasm/InputFiles.h
lld/wasm/MarkLive.cpp
lld/wasm/Symbols.cpp
lld/wasm/Writer.cpp
Removed:
################################################################################
diff --git a/lld/test/wasm/Inputs/ctor-ctor.s b/lld/test/wasm/Inputs/ctor-ctor.s
new file mode 100644
index 000000000000..cf6218238c5f
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-ctor.s
@@ -0,0 +1,15 @@
+ .section .text.def,"",@
+ .globl def
+def:
+ .functype def () -> ()
+ end_function
+
+ .section .text.test_ctor,"",@
+ .globl test_ctor
+test_ctor:
+ .functype test_ctor () -> ()
+ end_function
+
+ .section .init_array,"",@
+ .p2align 2
+ .int32 test_ctor
diff --git a/lld/test/wasm/Inputs/ctor-lib.s b/lld/test/wasm/Inputs/ctor-lib.s
new file mode 100644
index 000000000000..67dc0b02ee75
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-lib.s
@@ -0,0 +1,14 @@
+ .section .text.lib_func,"",@
+ .globl lib_func
+lib_func:
+ .functype lib_func () -> ()
+ end_function
+
+ .section .text.unused_lib_func,"",@
+ .globl unused_lib_func
+unused_lib_func:
+ .functype unused_lib_func () -> ()
+ call def
+ end_function
+
+ .functype def () -> ()
diff --git a/lld/test/wasm/Inputs/ctor-setup-call-def.s b/lld/test/wasm/Inputs/ctor-setup-call-def.s
new file mode 100644
index 000000000000..b0d09ddceeb7
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-setup-call-def.s
@@ -0,0 +1,21 @@
+# Like Inputs/ctor-setup.s, except it calls `def` instead of `lib_func`,
+# so it pulls in the .o file containing `ctor`.
+
+ .section .text._start,"",@
+ .globl _start
+_start:
+ .functype _start () -> ()
+ end_function
+
+ .section .text.setup,"",@
+ .globl setup
+setup:
+ .functype setup () -> ()
+ call def
+ end_function
+
+ .section .init_array,"",@
+ .p2align 2
+ .int32 setup
+
+.functype def () -> ()
diff --git a/lld/test/wasm/Inputs/ctor-setup.s b/lld/test/wasm/Inputs/ctor-setup.s
new file mode 100644
index 000000000000..814954842b5a
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-setup.s
@@ -0,0 +1,19 @@
+# Like Inputs/ctor-start.s, except it calls `lib_func` from a ctor
+# instead of from `_start`.
+
+ .globl _start
+_start:
+ .functype _start () -> ()
+ end_function
+
+ .globl setup
+setup:
+ .functype setup () -> ()
+ call lib_func
+ end_function
+
+ .section .init_array,"",@
+ .p2align 2
+ .int32 setup
+
+ .functype lib_func () -> ()
diff --git a/lld/test/wasm/Inputs/ctor-start.s b/lld/test/wasm/Inputs/ctor-start.s
new file mode 100644
index 000000000000..8f85fd2b2e13
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-start.s
@@ -0,0 +1,7 @@
+ .globl _start
+_start:
+ .functype _start () -> ()
+ call lib_func
+ end_function
+
+ .functype lib_func () -> ()
diff --git a/lld/test/wasm/ctor-gc-setup.test b/lld/test/wasm/ctor-gc-setup.test
new file mode 100644
index 000000000000..2076a42fd58c
--- /dev/null
+++ b/lld/test/wasm/ctor-gc-setup.test
@@ -0,0 +1,12 @@
+; Like ctor-gc.test, but main object calls a function from its constructor,
+; which shouldn't matter; `ctor` shouldn't be pulled in.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup.s -o %t.setup.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.setup.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK-NOT: Name: test_ctor
diff --git a/lld/test/wasm/ctor-gc.test b/lld/test/wasm/ctor-gc.test
new file mode 100644
index 000000000000..18deab54b4e3
--- /dev/null
+++ b/lld/test/wasm/ctor-gc.test
@@ -0,0 +1,12 @@
+; Verify that constructors from a .o file which it initially depends on but
+; doesn't ultimately contribute to the final link are not included.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-start.s -o %t.start.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.start.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK-NOT: __wasm_call_ctors
diff --git a/lld/test/wasm/ctor-no-gc.test b/lld/test/wasm/ctor-no-gc.test
new file mode 100644
index 000000000000..04e3fdcaea31
--- /dev/null
+++ b/lld/test/wasm/ctor-no-gc.test
@@ -0,0 +1,12 @@
+; Like ctor-gc-setup.test, but it calls a
diff erent function, so it does pull
+; in the object containing `ctor`, so `ctor` is linked in.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup-call-def.s -o %t.setup-call-def.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.setup-call-def.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK: Name: test_ctor
diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index eb3205671af3..0abd47a0ac20 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -60,8 +60,14 @@ class InputFile {
MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
+ // An InputFile is considered live if any of the symbols defined by it
+ // are live.
+ void markLive() { live = true; }
+ bool isLive() const { return live; }
+
protected:
- InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
+ InputFile(Kind k, MemoryBufferRef m)
+ : mb(m), fileKind(k), live(!config->gcSections) {}
MemoryBufferRef mb;
// List of all symbols referenced or defined by this file.
@@ -69,6 +75,7 @@ class InputFile {
private:
const Kind fileKind;
+ bool live;
};
// .a file (ar archive)
@@ -92,6 +99,10 @@ class ObjFile : public InputFile {
explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
: InputFile(ObjectKind, m) {
this->archiveName = std::string(archiveName);
+
+ // If this isn't part of an archive, it's eagerly linked, so mark it live.
+ if (archiveName.empty())
+ markLive();
}
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
@@ -156,6 +167,10 @@ class BitcodeFile : public InputFile {
explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName)
: InputFile(BitcodeKind, m) {
this->archiveName = std::string(archiveName);
+
+ // If this isn't part of an archive, it's eagerly linked, so mark it live.
+ if (archiveName.empty())
+ markLive();
}
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index 2766eec07ecb..4bce68877040 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -42,6 +42,7 @@ class MarkLive {
private:
void enqueue(Symbol *sym);
+ void enqueueInitFunctions(const ObjFile *sym);
void markSymbol(Symbol *sym);
void mark();
bool isCallCtorsLive();
@@ -56,11 +57,35 @@ void MarkLive::enqueue(Symbol *sym) {
if (!sym || sym->isLive())
return;
LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
+
+ InputFile *file = sym->getFile();
+ bool needInitFunctions = file && !file->isLive() && sym->isDefined();
+
sym->markLive();
+
+ // Mark ctor functions in the object that defines this symbol live.
+ // The ctor functions are all referenced by the synthetic callCtors
+ // function. However, this function does not contain relocations so we
+ // have to manually mark the ctors as live.
+ if (needInitFunctions)
+ enqueueInitFunctions(cast<ObjFile>(file));
+
if (InputChunk *chunk = sym->getChunk())
queue.push_back(chunk);
}
+// The ctor functions are all referenced by the synthetic callCtors
+// function. However, this function does not contain relocations so we
+// have to manually mark the ctors as live.
+void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
+ const WasmLinkingData &l = obj->getWasmObj()->linkingData();
+ for (const WasmInitFunc &f : l.InitFunctions) {
+ auto *initSym = obj->getFunctionSymbol(f.Symbol);
+ if (!initSym->isDiscarded())
+ enqueue(initSym);
+ }
+}
+
void MarkLive::run() {
// Add GC root symbols.
if (!config->entry.empty())
@@ -75,31 +100,24 @@ void MarkLive::run() {
if (Symbol *callDtors = WasmSym::callDtors)
enqueue(callDtors);
- // The ctor functions are all referenced by the synthetic callCtors
- // function. However, this function does not contain relocations so we
- // have to manually mark the ctors as live.
- for (const ObjFile *obj : symtab->objectFiles) {
- const WasmLinkingData &l = obj->getWasmObj()->linkingData();
- for (const WasmInitFunc &f : l.InitFunctions) {
- auto *initSym = obj->getFunctionSymbol(f.Symbol);
- if (!initSym->isDiscarded())
- enqueue(initSym);
- }
- }
-
// In Emscripten-style PIC, `__wasm_call_ctors` calls `__wasm_apply_relocs`.
if (config->isPic)
enqueue(WasmSym::applyRelocs);
- // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
- // live so that we assign it an index and call it.
- if (isCallCtorsLive())
- enqueue(WasmSym::callCtors);
-
if (config->sharedMemory && !config->shared)
enqueue(WasmSym::initMemory);
+ // Enqueue constructors in objects explicitly live from the command-line.
+ for (const ObjFile *obj : symtab->objectFiles)
+ if (obj->isLive())
+ enqueueInitFunctions(obj);
+
mark();
+
+ // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
+ // live so that we assign it an index and call it.
+ if (isCallCtorsLive())
+ WasmSym::callCtors->markLive();
}
void MarkLive::mark() {
@@ -181,9 +199,11 @@ bool MarkLive::isCallCtorsLive() {
// it can call them.
for (const ObjFile *file : symtab->objectFiles) {
const WasmLinkingData &l = file->getWasmObj()->linkingData();
- for (const WasmInitFunc &f : l.InitFunctions)
- if (!file->getFunctionSymbol(f.Symbol)->isDiscarded())
+ for (const WasmInitFunc &f : l.InitFunctions) {
+ auto *sym = file->getFunctionSymbol(f.Symbol);
+ if (!sym->isDiscarded() && sym->isLive())
return true;
+ }
}
return false;
diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index d69ef00329c9..e92af6c07469 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -132,6 +132,8 @@ bool Symbol::isLive() const {
void Symbol::markLive() {
assert(!isDiscarded());
+ if (file != NULL)
+ file->markLive();
if (auto *g = dyn_cast<DefinedGlobal>(this))
g->global->live = true;
if (auto *e = dyn_cast<DefinedEvent>(this))
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 31618314cf52..aaa29744c326 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -1112,9 +1112,8 @@ void Writer::calculateInitFunctions() {
for (const WasmInitFunc &f : l.InitFunctions) {
FunctionSymbol *sym = file->getFunctionSymbol(f.Symbol);
// comdat exclusions can cause init functions be discarded.
- if (sym->isDiscarded())
+ if (sym->isDiscarded() || !sym->isLive())
continue;
- assert(sym->isLive());
if (sym->signature->Params.size() != 0)
error("constructor functions cannot take arguments: " + toString(*sym));
LLVM_DEBUG(dbgs() << "initFunctions: " << toString(*sym) << "\n");
More information about the llvm-commits
mailing list