[lld] 950ae43 - [WebAssembly] GC constructor functions in otherwise unused archive objects

Dan Gohman via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 12 18:55:27 PDT 2020


Author: Dan Gohman
Date: 2020-10-12T18:54:57-07:00
New Revision: 950ae43091121ea357f735790f4042264cf40728

URL: https://github.com/llvm/llvm-project/commit/950ae43091121ea357f735790f4042264cf40728
DIFF: https://github.com/llvm/llvm-project/commit/950ae43091121ea357f735790f4042264cf40728.diff

LOG: [WebAssembly] GC constructor functions in otherwise unused archive objects

This allows `__wasilibc_populate_libpreopen` to be GC'd in more cases
where it isn't needed, including when linked from Rust's libstd.

Differential Revision: https://reviews.llvm.org/D85062

Added: 
    lld/test/wasm/Inputs/ctor-ctor.s
    lld/test/wasm/Inputs/ctor-lib.s
    lld/test/wasm/Inputs/ctor-setup-call-def.s
    lld/test/wasm/Inputs/ctor-setup.s
    lld/test/wasm/Inputs/ctor-start.s
    lld/test/wasm/ctor-gc-setup.test
    lld/test/wasm/ctor-gc.test
    lld/test/wasm/ctor-no-gc.test

Modified: 
    lld/wasm/InputFiles.h
    lld/wasm/MarkLive.cpp
    lld/wasm/Symbols.cpp
    lld/wasm/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/test/wasm/Inputs/ctor-ctor.s b/lld/test/wasm/Inputs/ctor-ctor.s
new file mode 100644
index 000000000000..cf6218238c5f
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-ctor.s
@@ -0,0 +1,15 @@
+	.section	.text.def,"",@
+	.globl def
+def:
+	.functype	def () -> ()
+	end_function
+
+	.section	.text.test_ctor,"",@
+	.globl test_ctor
+test_ctor:
+	.functype	test_ctor () -> ()
+	end_function
+
+	.section	.init_array,"",@
+	.p2align	2
+	.int32 test_ctor

diff  --git a/lld/test/wasm/Inputs/ctor-lib.s b/lld/test/wasm/Inputs/ctor-lib.s
new file mode 100644
index 000000000000..67dc0b02ee75
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-lib.s
@@ -0,0 +1,14 @@
+	.section	.text.lib_func,"",@
+	.globl	lib_func
+lib_func:
+	.functype	lib_func () -> ()
+	end_function
+
+	.section	.text.unused_lib_func,"",@
+	.globl unused_lib_func
+unused_lib_func:
+	.functype	unused_lib_func () -> ()
+	call def
+	end_function
+
+	.functype	def () -> ()

diff  --git a/lld/test/wasm/Inputs/ctor-setup-call-def.s b/lld/test/wasm/Inputs/ctor-setup-call-def.s
new file mode 100644
index 000000000000..b0d09ddceeb7
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-setup-call-def.s
@@ -0,0 +1,21 @@
+# Like Inputs/ctor-setup.s, except it calls `def` instead of `lib_func`,
+# so it pulls in the .o file containing `ctor`.
+
+	.section	.text._start,"",@
+	.globl	_start
+_start:
+	.functype	_start () -> ()
+	end_function
+
+	.section	.text.setup,"",@
+	.globl setup
+setup:
+	.functype	setup () -> ()
+	call def
+	end_function
+
+	.section	.init_array,"",@
+	.p2align	2
+	.int32 setup
+
+.functype       def () -> ()

diff  --git a/lld/test/wasm/Inputs/ctor-setup.s b/lld/test/wasm/Inputs/ctor-setup.s
new file mode 100644
index 000000000000..814954842b5a
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-setup.s
@@ -0,0 +1,19 @@
+# Like Inputs/ctor-start.s, except it calls `lib_func` from a ctor
+# instead of from `_start`.
+
+	.globl	_start
+_start:
+	.functype	_start () -> ()
+	end_function
+
+	.globl	setup
+setup:
+	.functype	setup () -> ()
+	call	lib_func
+	end_function
+
+	.section	.init_array,"",@
+	.p2align	2
+	.int32	setup
+
+        .functype       lib_func () -> ()

diff  --git a/lld/test/wasm/Inputs/ctor-start.s b/lld/test/wasm/Inputs/ctor-start.s
new file mode 100644
index 000000000000..8f85fd2b2e13
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-start.s
@@ -0,0 +1,7 @@
+	.globl _start
+_start:
+	.functype	_start () -> ()
+	call lib_func
+	end_function
+
+	.functype	lib_func () -> ()

diff  --git a/lld/test/wasm/ctor-gc-setup.test b/lld/test/wasm/ctor-gc-setup.test
new file mode 100644
index 000000000000..2076a42fd58c
--- /dev/null
+++ b/lld/test/wasm/ctor-gc-setup.test
@@ -0,0 +1,12 @@
+; Like ctor-gc.test, but main object calls a function from its constructor,
+; which shouldn't matter; `ctor` shouldn't be pulled in.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup.s -o %t.setup.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.setup.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK-NOT: Name: test_ctor

diff  --git a/lld/test/wasm/ctor-gc.test b/lld/test/wasm/ctor-gc.test
new file mode 100644
index 000000000000..18deab54b4e3
--- /dev/null
+++ b/lld/test/wasm/ctor-gc.test
@@ -0,0 +1,12 @@
+; Verify that constructors from a .o file which it initially depends on but
+; doesn't ultimately contribute to the final link are not included.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-start.s -o %t.start.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.start.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK-NOT: __wasm_call_ctors

diff  --git a/lld/test/wasm/ctor-no-gc.test b/lld/test/wasm/ctor-no-gc.test
new file mode 100644
index 000000000000..04e3fdcaea31
--- /dev/null
+++ b/lld/test/wasm/ctor-no-gc.test
@@ -0,0 +1,12 @@
+; Like ctor-gc-setup.test, but it calls a 
diff erent function, so it does pull
+; in the object containing `ctor`, so `ctor` is linked in.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup-call-def.s -o %t.setup-call-def.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.setup-call-def.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK: Name: test_ctor

diff  --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index eb3205671af3..0abd47a0ac20 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -60,8 +60,14 @@ class InputFile {
 
   MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
 
+  // An InputFile is considered live if any of the symbols defined by it
+  // are live.
+  void markLive() { live = true; }
+  bool isLive() const { return live; }
+
 protected:
-  InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
+  InputFile(Kind k, MemoryBufferRef m)
+      : mb(m), fileKind(k), live(!config->gcSections) {}
   MemoryBufferRef mb;
 
   // List of all symbols referenced or defined by this file.
@@ -69,6 +75,7 @@ class InputFile {
 
 private:
   const Kind fileKind;
+  bool live;
 };
 
 // .a file (ar archive)
@@ -92,6 +99,10 @@ class ObjFile : public InputFile {
   explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
       : InputFile(ObjectKind, m) {
     this->archiveName = std::string(archiveName);
+
+    // If this isn't part of an archive, it's eagerly linked, so mark it live.
+    if (archiveName.empty())
+      markLive();
   }
   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
 
@@ -156,6 +167,10 @@ class BitcodeFile : public InputFile {
   explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName)
       : InputFile(BitcodeKind, m) {
     this->archiveName = std::string(archiveName);
+
+    // If this isn't part of an archive, it's eagerly linked, so mark it live.
+    if (archiveName.empty())
+      markLive();
   }
   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
 

diff  --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index 2766eec07ecb..4bce68877040 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -42,6 +42,7 @@ class MarkLive {
 
 private:
   void enqueue(Symbol *sym);
+  void enqueueInitFunctions(const ObjFile *sym);
   void markSymbol(Symbol *sym);
   void mark();
   bool isCallCtorsLive();
@@ -56,11 +57,35 @@ void MarkLive::enqueue(Symbol *sym) {
   if (!sym || sym->isLive())
     return;
   LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
+
+  InputFile *file = sym->getFile();
+  bool needInitFunctions = file && !file->isLive() && sym->isDefined();
+
   sym->markLive();
+
+  // Mark ctor functions in the object that defines this symbol live.
+  // The ctor functions are all referenced by the synthetic callCtors
+  // function. However, this function does not contain relocations so we
+  // have to manually mark the ctors as live.
+  if (needInitFunctions)
+    enqueueInitFunctions(cast<ObjFile>(file));
+
   if (InputChunk *chunk = sym->getChunk())
     queue.push_back(chunk);
 }
 
+// The ctor functions are all referenced by the synthetic callCtors
+// function.  However, this function does not contain relocations so we
+// have to manually mark the ctors as live.
+void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
+  const WasmLinkingData &l = obj->getWasmObj()->linkingData();
+  for (const WasmInitFunc &f : l.InitFunctions) {
+    auto *initSym = obj->getFunctionSymbol(f.Symbol);
+    if (!initSym->isDiscarded())
+      enqueue(initSym);
+  }
+}
+
 void MarkLive::run() {
   // Add GC root symbols.
   if (!config->entry.empty())
@@ -75,31 +100,24 @@ void MarkLive::run() {
   if (Symbol *callDtors = WasmSym::callDtors)
     enqueue(callDtors);
 
-  // The ctor functions are all referenced by the synthetic callCtors
-  // function.  However, this function does not contain relocations so we
-  // have to manually mark the ctors as live.
-  for (const ObjFile *obj : symtab->objectFiles) {
-    const WasmLinkingData &l = obj->getWasmObj()->linkingData();
-    for (const WasmInitFunc &f : l.InitFunctions) {
-      auto *initSym = obj->getFunctionSymbol(f.Symbol);
-      if (!initSym->isDiscarded())
-        enqueue(initSym);
-    }
-  }
-
   // In Emscripten-style PIC, `__wasm_call_ctors` calls `__wasm_apply_relocs`.
   if (config->isPic)
     enqueue(WasmSym::applyRelocs);
 
-  // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
-  // live so that we assign it an index and call it.
-  if (isCallCtorsLive())
-    enqueue(WasmSym::callCtors);
-
   if (config->sharedMemory && !config->shared)
     enqueue(WasmSym::initMemory);
 
+  // Enqueue constructors in objects explicitly live from the command-line.
+  for (const ObjFile *obj : symtab->objectFiles)
+    if (obj->isLive())
+      enqueueInitFunctions(obj);
+
   mark();
+
+  // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
+  // live so that we assign it an index and call it.
+  if (isCallCtorsLive())
+    WasmSym::callCtors->markLive();
 }
 
 void MarkLive::mark() {
@@ -181,9 +199,11 @@ bool MarkLive::isCallCtorsLive() {
   // it can call them.
   for (const ObjFile *file : symtab->objectFiles) {
     const WasmLinkingData &l = file->getWasmObj()->linkingData();
-    for (const WasmInitFunc &f : l.InitFunctions)
-      if (!file->getFunctionSymbol(f.Symbol)->isDiscarded())
+    for (const WasmInitFunc &f : l.InitFunctions) {
+      auto *sym = file->getFunctionSymbol(f.Symbol);
+      if (!sym->isDiscarded() && sym->isLive())
         return true;
+    }
   }
 
   return false;

diff  --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index d69ef00329c9..e92af6c07469 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -132,6 +132,8 @@ bool Symbol::isLive() const {
 
 void Symbol::markLive() {
   assert(!isDiscarded());
+  if (file != NULL)
+    file->markLive();
   if (auto *g = dyn_cast<DefinedGlobal>(this))
     g->global->live = true;
   if (auto *e = dyn_cast<DefinedEvent>(this))

diff  --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 31618314cf52..aaa29744c326 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -1112,9 +1112,8 @@ void Writer::calculateInitFunctions() {
     for (const WasmInitFunc &f : l.InitFunctions) {
       FunctionSymbol *sym = file->getFunctionSymbol(f.Symbol);
       // comdat exclusions can cause init functions be discarded.
-      if (sym->isDiscarded())
+      if (sym->isDiscarded() || !sym->isLive())
         continue;
-      assert(sym->isLive());
       if (sym->signature->Params.size() != 0)
         error("constructor functions cannot take arguments: " + toString(*sym));
       LLVM_DEBUG(dbgs() << "initFunctions: " << toString(*sym) << "\n");


        


More information about the llvm-commits mailing list