[lld] [llvm] [WebAssembly] Add segment NO_STRIP flag to support private retained data (PR #81539)

Yuta Saito via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 13 01:39:02 PST 2024


https://github.com/kateinoigakukun updated https://github.com/llvm/llvm-project/pull/81539

>From a37d9c96d5ed7a0cba8e89694d91a0fa35eee273 Mon Sep 17 00:00:00 2001
From: Yuta Saito <kateinoigakukun at gmail.com>
Date: Thu, 1 Feb 2024 06:32:42 +0000
Subject: [PATCH] [WebAssembly] Add segment RETAIN flag to support private
 retained data

In WebAssembly, we have `WASM_SYMBOL_RETAIN` symbol flag to mark the
referenced content as retained. However, the flag is not enough to
express retained data that is not referenced by any symbol.
This patch adds a new segment flag WASM_SEG_FLAG_RETAIN to support
"private" linkage data that is retained by llvm.used.

This kind of data that is not referenced but must be retained is usually
used with encapsulation symbols (__start/__stop). Swift runtime uses
this technique and depends on the fact "all metadata sections in live
objects are retained", which was not guaranteed with `--gc-sections`
before this patch.
---
 lld/test/wasm/no-strip-segment.s              | 62 +++++++++++++++
 lld/wasm/InputChunks.h                        |  1 +
 lld/wasm/MarkLive.cpp                         | 39 ++++++++--
 llvm/include/llvm/BinaryFormat/Wasm.h         |  1 +
 .../CodeGen/TargetLoweringObjectFileImpl.h    |  3 +
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  | 29 +++++--
 llvm/lib/MC/MCParser/WasmAsmParser.cpp        |  3 +
 llvm/lib/MC/MCSectionWasm.cpp                 |  2 +
 llvm/lib/ObjectYAML/WasmYAML.cpp              |  1 +
 llvm/test/CodeGen/WebAssembly/no-strip.ll     | 22 ++++++
 llvm/test/MC/WebAssembly/no-dead-strip.ll     | 76 +++++++++++++++----
 11 files changed, 210 insertions(+), 29 deletions(-)
 create mode 100644 lld/test/wasm/no-strip-segment.s
 create mode 100644 llvm/test/CodeGen/WebAssembly/no-strip.ll

diff --git a/lld/test/wasm/no-strip-segment.s b/lld/test/wasm/no-strip-segment.s
new file mode 100644
index 00000000000000..57bbf0bb9f24fa
--- /dev/null
+++ b/lld/test/wasm/no-strip-segment.s
@@ -0,0 +1,62 @@
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/main.o %t/main.s
+# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/liba_x.o %t/liba_x.s
+# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/liba_y.o %t/liba_y.s
+# RUN: rm -f %t/liba.a
+# RUN: llvm-ar rcs %t/liba.a %t/liba_x.o %t/liba_y.o
+# RUN: wasm-ld %t/main.o %t/liba.a --gc-sections -o %t/main.wasm --print-gc-sections | FileCheck %s --check-prefix=GC
+# RUN: obj2yaml %t/main.wasm | FileCheck %s
+
+# --gc-sections should remove non-retained and unused "weathers" section from live object liba_x.o
+# GC: removing unused section {{.*}}/liba.a(liba_x.o):(weathers)
+# Should not remove retained "greetings" sections from live objects main.o and liba_x.o
+# GC-NOT: removing unused section %t/main.o:(greetings)
+# GC-NOT: removing unused section %t/liba_x.o:(greetings)
+
+# Note: All symbols are private so that they don't join the symbol table.
+
+#--- main.s
+  .functype grab_liba () -> ()
+  .globl  _start
+_start:
+  .functype _start () -> ()
+  call grab_liba
+  end_function
+
+  .section greetings,"R",@
+  .asciz  "hello"
+  .section weathers,"R",@
+  .asciz  "cloudy"
+
+#--- liba_x.s
+  .globl  grab_liba
+grab_liba:
+  .functype grab_liba () -> ()
+  end_function
+
+  .section greetings,"R",@
+  .asciz  "world"
+  .section weathers,"",@
+  .asciz  "rainy"
+
+#--- liba_y.s
+        .section        greetings,"R",@
+        .asciz  "bye"
+
+
+# "greetings" section
+# CHECK: - Type:            DATA
+# CHECK:   Segments:
+# CHECK:     - SectionOffset:   7
+# CHECK:       InitFlags:       0
+# CHECK:       Offset:
+# CHECK:         Opcode:          I32_CONST
+# CHECK:         Value:           1024
+# CHECK:       Content:         68656C6C6F00776F726C6400
+# "weahters" section.
+# CHECK: - SectionOffset:   25
+# CHECK:   InitFlags:       0
+# CHECK:   Offset:
+# CHECK:     Opcode:          I32_CONST
+# CHECK:     Value:           1036
+# CHECK:   Content:         636C6F75647900
diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h
index ad1d45e335eac9..cf8a5249b19a00 100644
--- a/lld/wasm/InputChunks.h
+++ b/lld/wasm/InputChunks.h
@@ -81,6 +81,7 @@ class InputChunk {
   void generateRelocationCode(raw_ostream &os) const;
 
   bool isTLS() const { return flags & llvm::wasm::WASM_SEG_FLAG_TLS; }
+  bool isRetained() const { return flags & llvm::wasm::WASM_SEG_FLAG_RETAIN; }
 
   ObjFile *file;
   OutputSection *outputSec = nullptr;
diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index b8ab7741ff1cb3..0f1c50854fdfd1 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -40,7 +40,9 @@ class MarkLive {
 
 private:
   void enqueue(Symbol *sym);
+  void enqueue(InputChunk *chunk);
   void enqueueInitFunctions(const ObjFile *sym);
+  void enqueueRetainedSegments(const ObjFile *file);
   void mark();
   bool isCallCtorsLive();
 
@@ -56,21 +58,30 @@ void MarkLive::enqueue(Symbol *sym) {
   LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
 
   InputFile *file = sym->getFile();
-  bool needInitFunctions = file && !file->isLive() && sym->isDefined();
+  bool markImplicitDeps = file && !file->isLive() && sym->isDefined();
 
   sym->markLive();
 
-  // Mark ctor functions in the object that defines this symbol live.
-  // The ctor functions are all referenced by the synthetic callCtors
-  // function. However, this function does not contain relocations so we
-  // have to manually mark the ctors as live.
-  if (needInitFunctions)
+  if (markImplicitDeps) {
+    // Mark ctor functions in the object that defines this symbol live.
+    // The ctor functions are all referenced by the synthetic callCtors
+    // function. However, this function does not contain relocations so we
+    // have to manually mark the ctors as live.
     enqueueInitFunctions(cast<ObjFile>(file));
+    // Mark retained segments in the object that defines this symbol live.
+    enqueueRetainedSegments(cast<ObjFile>(file));
+  }
 
   if (InputChunk *chunk = sym->getChunk())
     queue.push_back(chunk);
 }
 
+void MarkLive::enqueue(InputChunk *chunk) {
+  LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n");
+  chunk->live = true;
+  queue.push_back(chunk);
+}
+
 // The ctor functions are all referenced by the synthetic callCtors
 // function.  However, this function does not contain relocations so we
 // have to manually mark the ctors as live.
@@ -83,6 +94,14 @@ void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
   }
 }
 
+// Mark segments flagged by segment-level no-strip. Segment-level no-strip is
+// usually used to retain segments without having symbol table entry.
+void MarkLive::enqueueRetainedSegments(const ObjFile *file) {
+  for (InputChunk *chunk : file->segments)
+    if (chunk->isRetained())
+      enqueue(chunk);
+}
+
 void MarkLive::run() {
   // Add GC root symbols.
   if (!config->entry.empty())
@@ -96,10 +115,14 @@ void MarkLive::run() {
   if (WasmSym::callDtors)
     enqueue(WasmSym::callDtors);
 
-  // Enqueue constructors in objects explicitly live from the command-line.
   for (const ObjFile *obj : ctx.objectFiles)
-    if (obj->isLive())
+    if (obj->isLive()) {
+      // Enqueue constructors in objects explicitly live from the command-line.
       enqueueInitFunctions(obj);
+      // Enqueue retained segments in objects explicitly live from the
+      // command-line.
+      enqueueRetainedSegments(obj);
+    }
 
   mark();
 
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index aec6ea0b757799..aa9609091e08c8 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -216,6 +216,7 @@ enum WasmSymbolType : unsigned {
 enum WasmSegmentFlag : unsigned {
   WASM_SEG_FLAG_STRINGS = 0x1,
   WASM_SEG_FLAG_TLS = 0x2,
+  WASM_SEG_FLAG_RETAIN = 0x4,
 };
 
 // Kinds of tag attributes.
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 4a7c1ca4a57182..8eef45ce565deb 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -207,11 +207,14 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
 
 class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile {
   mutable unsigned NextUniqueID = 0;
+  SmallPtrSet<GlobalObject *, 2> Used;
 
 public:
   TargetLoweringObjectFileWasm() = default;
   ~TargetLoweringObjectFileWasm() override = default;
 
+  void getModuleMetadata(Module &M) override;
+
   MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2c5b0b3d0c44c7..0cc05a6846c208 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2141,7 +2141,7 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) {
   return C;
 }
 
-static unsigned getWasmSectionFlags(SectionKind K) {
+static unsigned getWasmSectionFlags(SectionKind K, bool Retain) {
   unsigned Flags = 0;
 
   if (K.isThreadLocal())
@@ -2150,11 +2150,22 @@ static unsigned getWasmSectionFlags(SectionKind K) {
   if (K.isMergeableCString())
     Flags |= wasm::WASM_SEG_FLAG_STRINGS;
 
+  if (Retain)
+    Flags |= wasm::WASM_SEG_FLAG_RETAIN;
+
   // TODO(sbc): Add suport for K.isMergeableConst()
 
   return Flags;
 }
 
+void TargetLoweringObjectFileWasm::getModuleMetadata(Module &M) {
+  SmallVector<GlobalValue *, 4> Vec;
+  collectUsedGlobalVariables(M, Vec, false);
+  for (GlobalValue *GV : Vec)
+    if (auto *GO = dyn_cast<GlobalObject>(GV))
+      Used.insert(GO);
+}
+
 MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // We don't support explict section names for functions in the wasm object
@@ -2178,16 +2189,18 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
     Group = C->getName();
   }
 
-  unsigned Flags = getWasmSectionFlags(Kind);
+  unsigned Flags = getWasmSectionFlags(Kind, Used.count(GO));
   MCSectionWasm *Section = getContext().getWasmSection(
       Name, Kind, Flags, Group, MCContext::GenericSectionID);
 
   return Section;
 }
 
-static MCSectionWasm *selectWasmSectionForGlobal(
-    MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) {
+static MCSectionWasm *
+selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
+                           SectionKind Kind, Mangler &Mang,
+                           const TargetMachine &TM, bool EmitUniqueSection,
+                           unsigned *NextUniqueID, bool Retain) {
   StringRef Group = "";
   if (const Comdat *C = getWasmComdat(GO)) {
     Group = C->getName();
@@ -2212,7 +2225,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
     (*NextUniqueID)++;
   }
 
-  unsigned Flags = getWasmSectionFlags(Kind);
+  unsigned Flags = getWasmSectionFlags(Kind, Retain);
   return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID);
 }
 
@@ -2230,9 +2243,11 @@ MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
   else
     EmitUniqueSection = TM.getDataSections();
   EmitUniqueSection |= GO->hasComdat();
+  bool Retain = Used.count(GO);
+  EmitUniqueSection |= Retain;
 
   return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
-                                    EmitUniqueSection, &NextUniqueID);
+                                    EmitUniqueSection, &NextUniqueID, Retain);
 }
 
 bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 97045495a60dec..b95ee33debc37b 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -115,6 +115,9 @@ class WasmAsmParser : public MCAsmParserExtension {
       case 'S':
         flags |= wasm::WASM_SEG_FLAG_STRINGS;
         break;
+      case 'R':
+        flags |= wasm::WASM_SEG_FLAG_RETAIN;
+        break;
       default:
         return -1U;
       }
diff --git a/llvm/lib/MC/MCSectionWasm.cpp b/llvm/lib/MC/MCSectionWasm.cpp
index e90f401b1efa1f..e3761820bb4c3e 100644
--- a/llvm/lib/MC/MCSectionWasm.cpp
+++ b/llvm/lib/MC/MCSectionWasm.cpp
@@ -70,6 +70,8 @@ void MCSectionWasm::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
     OS << 'S';
   if (SegmentFlags & wasm::WASM_SEG_FLAG_TLS)
     OS << 'T';
+  if (SegmentFlags & wasm::WASM_SEG_FLAG_RETAIN)
+    OS << 'R';
 
   OS << '"';
 
diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp
index 3b53788eddbabc..544a91d03dce01 100644
--- a/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -561,6 +561,7 @@ void ScalarBitSetTraits<WasmYAML::SegmentFlags>::bitset(
 #define BCase(X) IO.bitSetCase(Value, #X, wasm::WASM_SEG_FLAG_##X)
   BCase(STRINGS);
   BCase(TLS);
+  BCase(RETAIN);
 #undef BCase
 }
 
diff --git a/llvm/test/CodeGen/WebAssembly/no-strip.ll b/llvm/test/CodeGen/WebAssembly/no-strip.ll
new file mode 100644
index 00000000000000..e6206aebdce8e2
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/no-strip.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown | FileCheck %s
+
+ at llvm.used = appending global [
+  5 x ptr
+] [
+  ptr @ga, ptr @gb, ptr @gc, ptr @gd, ptr @ge
+], section "llvm.metadata"
+
+; CHECK: .section        .data.ga,"R",@
+ at ga = global i32 42
+; CHECK: .section        .data.gb,"R",@
+ at gb = internal global i32 41
+; CHECK: .section        .data..Lgc,"R",@
+ at gc = private global i32 40
+; CHECK: .section        .rodata.gd,"R",@
+ at gd = constant i32 39
+
+; All sections with the same explicit name are flagged as retained if a part of them is retained.
+; CHECK: .section        dddd,"R",@
+ at ge = global i32 38, section "dddd"
+; CHECK: .section        dddd,"R",@
+ at gg = global i32 37, section "dddd"
diff --git a/llvm/test/MC/WebAssembly/no-dead-strip.ll b/llvm/test/MC/WebAssembly/no-dead-strip.ll
index 9b550ec6cefbc2..6b3f090d9cab8f 100644
--- a/llvm/test/MC/WebAssembly/no-dead-strip.ll
+++ b/llvm/test/MC/WebAssembly/no-dead-strip.ll
@@ -1,21 +1,69 @@
-; RUN: llc -filetype=obj -wasm-keep-registers %s -o - | llvm-readobj --symbols - | FileCheck %s
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown -filetype=obj -wasm-keep-registers -o - | obj2yaml - | FileCheck %s
 
-target triple = "wasm32-unknown-unknown"
-
- at llvm.used = appending global [1 x ptr] [ptr @foo], section "llvm.metadata"
+ at llvm.used = appending global [5 x ptr] [
+  ptr @foo, ptr @gv0, ptr @gv1, ptr @gv2, ptr @gv3
+], section "llvm.metadata"
 
 define i32 @foo() {
 entry:
     ret i32 0
 }
 
-; CHECK:      Symbols [
-; CHECK-NEXT:   Symbol {
-; CHECK-NEXT:     Name: foo
-; CHECK-NEXT:     Type: FUNCTION (0x0)
-; CHECK-NEXT:     Flags [ (0x80)
-; CHECK-NEXT:       NO_STRIP (0x80)
-; CHECK-NEXT:     ]
-; CHECK-NEXT:     ElementIndex: 0x0
-; CHECK-NEXT:   }
-; CHECK-NEXT: ]
+; externally visible GV has NO_STRIP/RETAIN in both symtab entry and segment info
+ at gv0 = global i32 42
+; internal GV has NO_STRIP/RETAIN in both symtab entry and segment info
+ at gv1 = internal global i32 41
+; private GV has RETAIN in segment info only (no symtab entry)
+ at gv2 = private global i32 40
+; explicit section names
+ at gv3 = global i32 39, section "ddd.hello"
+ at gv4.not.used = global i64 38, section "ddd.hello"
+
+; CHECK:         SymbolTable:
+; CHECK-NEXT:      - Index:           0
+; CHECK-NEXT:        Kind:            FUNCTION
+; CHECK-NEXT:        Name:            foo
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]
+; CHECK-NEXT:        Function:        0
+; CHECK-NEXT:      - Index:           1
+; CHECK-NEXT:        Kind:            DATA
+; CHECK-NEXT:        Name:            gv0
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]
+; CHECK-NEXT:        Segment:         0
+; CHECK-NEXT:        Size:            4
+; CHECK-NEXT:      - Index:           2
+; CHECK-NEXT:        Kind:            DATA
+; CHECK-NEXT:        Name:            gv1
+; CHECK-NEXT:        Flags:           [ BINDING_LOCAL, NO_STRIP ]
+; CHECK-NEXT:        Segment:         1
+; CHECK-NEXT:        Size:            4
+; CHECK-NEXT:      - Index:           3
+; CHECK-NEXT:        Kind:            DATA
+; CHECK-NEXT:        Name:            gv3
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]
+; CHECK-NEXT:        Segment:         3
+; CHECK-NEXT:        Size:            4
+; CHECK-NEXT:      - Index:           4
+; CHECK-NEXT:        Kind:            DATA
+; CHECK-NEXT:        Name:            gv4.not.used
+; CHECK-NEXT:        Flags:           [  ]
+; CHECK-NEXT:        Segment:         3
+; CHECK-NEXT:        Offset:          8
+; CHECK-NEXT:        Size:            8
+; CHECK-NEXT:    SegmentInfo:
+; CHECK-NEXT:      - Index:           0
+; CHECK-NEXT:        Name:            .data.gv0
+; CHECK-NEXT:        Alignment:       2
+; CHECK-NEXT:        Flags:           [ RETAIN ]
+; CHECK-NEXT:      - Index:           1
+; CHECK-NEXT:        Name:            .data.gv1
+; CHECK-NEXT:        Alignment:       2
+; CHECK-NEXT:        Flags:           [ RETAIN ]
+; CHECK-NEXT:      - Index:           2
+; CHECK-NEXT:        Name:            .data..Lgv2
+; CHECK-NEXT:        Alignment:       2
+; CHECK-NEXT:        Flags:           [ RETAIN ]
+; CHECK-NEXT:      - Index:           3
+; CHECK-NEXT:        Name:            ddd.hello
+; CHECK-NEXT:        Alignment:       3
+; CHECK-NEXT:        Flags:           [ RETAIN ]



More information about the llvm-commits mailing list