[lld] dcf3368 - [lld][WebAssembly] Retain data segments referenced via __start/__stop

Yuta Saito via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 3 19:35:09 PDT 2022


Author: Yuta Saito
Date: 2022-06-04T02:28:31Z
New Revision: dcf3368e33c3a01bd21b692d3be5dc1ecee587f4

URL: https://github.com/llvm/llvm-project/commit/dcf3368e33c3a01bd21b692d3be5dc1ecee587f4
DIFF: https://github.com/llvm/llvm-project/commit/dcf3368e33c3a01bd21b692d3be5dc1ecee587f4.diff

LOG: [lld][WebAssembly] Retain data segments referenced via __start/__stop

As well as ELF linker does, retain all data segments named X referenced
through `__start_X` or `__stop_X`.

For example, `FOO_MD` should not be stripped in the below case, but it's currently mis-stripped

```llvm
@FOO_MD  = global [4 x i8] c"bar\00", section "foo_md", align 1
@__start_foo_md = external constant i8*
@__stop_foo_md = external constant i8*
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @foo_md_size to i8*)], section "llvm.metadata"

define i32 @foo_md_size()  {
entry:
  ret i32 sub (
    i32 ptrtoint (i8** @__stop_foo_md to i32),
    i32 ptrtoint (i8** @__start_foo_md to i32)
  )
}
```

This fixes https://github.com/llvm/llvm-project/issues/55839

Reviewed By: sbc100

Differential Revision: https://reviews.llvm.org/D126950

Added: 
    lld/test/wasm/gc-sections-startstop.s

Modified: 
    lld/wasm/MarkLive.cpp

Removed: 
    


################################################################################
diff  --git a/lld/test/wasm/gc-sections-startstop.s b/lld/test/wasm/gc-sections-startstop.s
new file mode 100644
index 000000000000..f90164d743a9
--- /dev/null
+++ b/lld/test/wasm/gc-sections-startstop.s
@@ -0,0 +1,89 @@
+# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -o %t.o %s
+# RUN: wasm-ld %t.o -o %t.wasm
+# RUN: llvm-objdump -d --no-show-raw-insn %t.wasm | FileCheck %s
+
+# FOO_MD symbol is not used directly, but is referenced through __start/__stop_foo_md
+foo_md_size:
+	.functype	foo_md_size () -> (i32)
+	i32.const	__stop_foo_md
+	i32.const	__start_foo_md
+	i32.sub
+	end_function
+
+# CHECK: <foo_md_size>:
+# CHECK-EMPTY:
+# CHECK-NEXT: i32.const [[#STOP_ADDR:]]
+# CHECK-NEXT: i32.const [[#STOP_ADDR - 4]]
+# CHECK-NEXT: i32.sub
+
+# All segments in concat_section section are marked as live.
+concat_section_size:
+	.functype	concat_section_size () -> (i32)
+	i32.const	__stop_concat_section
+	i32.const	__start_concat_section
+	i32.sub
+	end_function
+
+# CHECK: <concat_section_size>:
+# CHECK-EMPTY:
+# CHECK-NEXT: i32.const [[#STOP_ADDR:]]
+# CHECK-NEXT: i32.const [[#STOP_ADDR - 8]]
+# CHECK-NEXT: i32.sub
+
+
+# __start/__stop symbols don't retain invalid C name sections
+invalid_name_section_size:
+	.functype	invalid_name_section_size () -> (i32)
+	i32.const	__stop_invalid.dot.name
+	i32.const	__start_invalid.dot.name
+	i32.sub
+	end_function
+
+# CHECK: <invalid_name_section_size>:
+# CHECK-EMPTY:
+# CHECK-NEXT: i32.const 0
+# CHECK-NEXT: i32.const 0
+# CHECK-NEXT: i32.sub
+
+
+	.globl	_start
+_start:
+	.functype	_start () -> ()
+	call	foo_md_size
+	drop
+	call	concat_section_size
+	drop
+	call	invalid_name_section_size
+	drop
+	end_function
+
+
+	.section	foo_md,"",@
+FOO_MD:
+	.asciz	"bar"
+	.size	FOO_MD, 4
+
+	.size	__start_foo_md, 4
+	.size	__stop_foo_md, 4
+
+
+	.section	concat_section,"",@
+concat_segment_1:
+	.asciz	"xxx"
+	.size	concat_segment_1, 4
+
+concat_segment_2:
+	.asciz	"yyy"
+	.size	concat_segment_2, 4
+
+	.size	__start_concat_section, 4
+	.size	__stop_concat_section, 4
+
+
+	.section	invalid.dot.name,"",@
+invalid_name_section:
+	.asciz	"fizz"
+	.size	invalid_name_section, 5
+
+	.weak	__start_invalid.dot.name
+	.weak	__stop_invalid.dot.name

diff  --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index 3e5d234eeccc..c811dd4d2226 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -24,6 +24,7 @@
 #include "InputElement.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
+#include "lld/Common/Strings.h"
 
 #define DEBUG_TYPE "lld"
 
@@ -41,6 +42,7 @@ class MarkLive {
 
 private:
   void enqueue(Symbol *sym);
+  void enqueue(InputChunk *chunk);
   void enqueueInitFunctions(const ObjFile *sym);
   void mark();
   bool isCallCtorsLive();
@@ -84,6 +86,12 @@ void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
   }
 }
 
+void MarkLive::enqueue(InputChunk *chunk) {
+  LLVM_DEBUG(dbgs() << "markLive: " << chunk->getName() << "\n");
+  chunk->live = true;
+  queue.push_back(chunk);
+}
+
 void MarkLive::run() {
   // Add GC root symbols.
   if (!config->entry.empty())
@@ -97,10 +105,24 @@ void MarkLive::run() {
   if (WasmSym::callDtors)
     enqueue(WasmSym::callDtors);
 
-  // Enqueue constructors in objects explicitly live from the command-line.
-  for (const ObjFile *obj : symtab->objectFiles)
-    if (obj->isLive())
-      enqueueInitFunctions(obj);
+  for (const ObjFile *obj : symtab->objectFiles) {
+    if (!obj->isLive()) {
+      continue;
+    }
+    // Enqueue constructors in objects explicitly live from the command-line.
+    enqueueInitFunctions(obj);
+
+    // Enqueue data segments referenced through __start/__stop symbols.
+    for (InputChunk *segment : obj->segments) {
+      auto name = segment->name;
+      if (!isValidCIdentifier(name))
+        continue;
+      if (symtab->find(("__start_" + name).str()) ||
+          symtab->find(("__stop_" + name).str())) {
+        enqueue(segment);
+      }
+    }
+  }
 
   mark();
 


        


More information about the llvm-commits mailing list