[lld] [llvm] [WebAssembly] Add segment NO_STRIP flag to support private retained data (PR #81539)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 12 13:36:05 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld-wasm

Author: Yuta Saito (kateinoigakukun)

<details>
<summary>Changes</summary>

In WebAssembly, we have `WASM_SYMBOL_NO_STRIP` symbol flag to mark the referenced content as retained. However, the flag is not enough to express retained data that is not referenced by any symbol. This patch adds a new segment flag WASM_SEG_FLAG_NO_STRIP to support "private" linkage data that is retained by llvm.used.

This kind of data that is not referenced but must be retained is usually used with encapsulation symbols (__start/__stop). Swift runtime uses this technique and depends on the fact "all metadata sections in live objects are retained", which was not guaranteed with `--gc-sections` before this patch.

This is a revised version of https://reviews.llvm.org/D126950 (has been reverted) based on @<!-- -->MaskRay's comments

---
Full diff: https://github.com/llvm/llvm-project/pull/81539.diff


11 Files Affected:

- (added) lld/test/wasm/no-strip-segment.ll (+61) 
- (modified) lld/wasm/InputChunks.h (+1) 
- (modified) lld/wasm/MarkLive.cpp (+30-8) 
- (modified) llvm/include/llvm/BinaryFormat/Wasm.h (+1) 
- (modified) llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h (+3) 
- (modified) llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp (+21-5) 
- (modified) llvm/lib/MC/MCParser/WasmAsmParser.cpp (+3) 
- (modified) llvm/lib/MC/MCSectionWasm.cpp (+2) 
- (modified) llvm/lib/ObjectYAML/WasmYAML.cpp (+1) 
- (added) llvm/test/CodeGen/WebAssembly/no-strip.ll (+22) 
- (modified) llvm/test/MC/WebAssembly/no-dead-strip.ll (+62-14) 


``````````diff
diff --git a/lld/test/wasm/no-strip-segment.ll b/lld/test/wasm/no-strip-segment.ll
new file mode 100644
index 00000000000000..fb6ed65f4cee55
--- /dev/null
+++ b/lld/test/wasm/no-strip-segment.ll
@@ -0,0 +1,61 @@
+; RUN: split-file %s %t
+; RUN: llc -filetype=obj --mtriple=wasm32-unknown-unknown -o %t/main.o %t/main.ll
+; RUN: llc -filetype=obj --mtriple=wasm32-unknown-unknown -o %t/liba_x.o %t/liba_x.ll
+; RUN: llc -filetype=obj --mtriple=wasm32-unknown-unknown -o %t/liba_y.o %t/liba_y.ll
+; RUN: llvm-ar rcs %t/liba.a %t/liba_x.o %t/liba_y.o
+; RUN: wasm-ld %t/main.o %t/liba.a --gc-sections -o %t/main.wasm --print-gc-sections | FileCheck %s --check-prefix=GC
+; RUN: obj2yaml %t/main.wasm | FileCheck %s
+
+; --gc-sections should remove non-retained and unused "weathers" section from live object liba_x.o
+; GC: removing unused section {{.*}}/liba.a(liba_x.o):(weathers)
+; Should not remove retained "greetings" sections from live objects main.o and liba_x.o
+; GC-NOT: removing unused section %t/main.o:(greetings)
+; GC-NOT: removing unused section %t/liba_x.o:(greetings)
+
+; Note: All symbols are private so that they don't join the symbol table.
+
+;--- main.ll
+
+ at greet_a = private constant [6 x i8] c"hello\00", align 1, section "greetings"
+ at weather_a = private constant [7 x i8] c"cloudy\00", align 1, section "weathers"
+ at llvm.used = appending global [2 x ptr] [ptr @greet_a, ptr @weather_a], section "llvm.metadata"
+
+declare void @grab_liba()
+define void @_start() {
+  call void @grab_liba()
+  ret void
+}
+
+;--- liba_x.ll
+
+ at greet_b = private constant [6 x i8] c"world\00", align 1, section "greetings"
+ at weather_b = private constant [6 x i8] c"rainy\00", align 1, section "weathers"
+
+ at llvm.used = appending global [1 x ptr] [ptr @greet_b], section "llvm.metadata"
+
+define void @grab_liba() {
+  ret void
+}
+
+;--- liba_y.ll
+ at greet_d = private constant [4 x i8] c"bye\00", align 1, section "greetings"
+
+ at llvm.used = appending global [1 x ptr] [ptr @greet_d], section "llvm.metadata"
+
+
+; "greetings" section
+; CHECK: - Type:            DATA
+; CHECK:   Segments:
+; CHECK:     - SectionOffset:   7
+; CHECK:       InitFlags:       0
+; CHECK:       Offset:
+; CHECK:         Opcode:          I32_CONST
+; CHECK:         Value:           1024
+; CHECK:       Content:         68656C6C6F00776F726C6400
+; "weahters" section.
+; CHECK: - SectionOffset:   25
+; CHECK:   InitFlags:       0
+; CHECK:   Offset:
+; CHECK:     Opcode:          I32_CONST
+; CHECK:     Value:           1036
+; CHECK:   Content:         636C6F75647900
diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h
index ad1d45e335eac9..c07775872b68f3 100644
--- a/lld/wasm/InputChunks.h
+++ b/lld/wasm/InputChunks.h
@@ -81,6 +81,7 @@ class InputChunk {
   void generateRelocationCode(raw_ostream &os) const;
 
   bool isTLS() const { return flags & llvm::wasm::WASM_SEG_FLAG_TLS; }
+  bool isNoStrip() const { return flags & llvm::wasm::WASM_SEG_FLAG_NO_STRIP; }
 
   ObjFile *file;
   OutputSection *outputSec = nullptr;
diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index b8ab7741ff1cb3..a45f0fd730d008 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -40,7 +40,9 @@ class MarkLive {
 
 private:
   void enqueue(Symbol *sym);
+  void enqueue(InputChunk *chunk);
   void enqueueInitFunctions(const ObjFile *sym);
+  void enqueueRetainedSegments(const ObjFile *file);
   void mark();
   bool isCallCtorsLive();
 
@@ -56,21 +58,30 @@ void MarkLive::enqueue(Symbol *sym) {
   LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
 
   InputFile *file = sym->getFile();
-  bool needInitFunctions = file && !file->isLive() && sym->isDefined();
+  bool needMarkImplicitDeps = file && !file->isLive() && sym->isDefined();
 
   sym->markLive();
 
-  // Mark ctor functions in the object that defines this symbol live.
-  // The ctor functions are all referenced by the synthetic callCtors
-  // function. However, this function does not contain relocations so we
-  // have to manually mark the ctors as live.
-  if (needInitFunctions)
+  if (needMarkImplicitDeps) {
+    // Mark ctor functions in the object that defines this symbol live.
+    // The ctor functions are all referenced by the synthetic callCtors
+    // function. However, this function does not contain relocations so we
+    // have to manually mark the ctors as live.
     enqueueInitFunctions(cast<ObjFile>(file));
+    // Mark retained segments in the object that defines this symbol live.
+    enqueueRetainedSegments(cast<ObjFile>(file));
+  }
 
   if (InputChunk *chunk = sym->getChunk())
     queue.push_back(chunk);
 }
 
+void MarkLive::enqueue(InputChunk *chunk) {
+  LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n");
+  chunk->live = true;
+  queue.push_back(chunk);
+}
+
 // The ctor functions are all referenced by the synthetic callCtors
 // function.  However, this function does not contain relocations so we
 // have to manually mark the ctors as live.
@@ -83,6 +94,14 @@ void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
   }
 }
 
+// Mark segments flagged by segment-level no-strip. Segment-level no-strip is
+// usually used to retain segments without having symbol table entry.
+void MarkLive::enqueueRetainedSegments(const ObjFile *file) {
+  for (InputChunk *chunk : file->segments)
+    if (chunk->isNoStrip())
+      enqueue(chunk);
+}
+
 void MarkLive::run() {
   // Add GC root symbols.
   if (!config->entry.empty())
@@ -96,10 +115,13 @@ void MarkLive::run() {
   if (WasmSym::callDtors)
     enqueue(WasmSym::callDtors);
 
-  // Enqueue constructors in objects explicitly live from the command-line.
   for (const ObjFile *obj : ctx.objectFiles)
-    if (obj->isLive())
+    if (obj->isLive()) {
+      // Enqueue constructors in objects explicitly live from the command-line.
       enqueueInitFunctions(obj);
+      // Enqueue retained segments in objects explicitly live from the command-line.
+      enqueueRetainedSegments(obj);
+    }
 
   mark();
 
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index aec6ea0b757799..ca2064d004f733 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -216,6 +216,7 @@ enum WasmSymbolType : unsigned {
 enum WasmSegmentFlag : unsigned {
   WASM_SEG_FLAG_STRINGS = 0x1,
   WASM_SEG_FLAG_TLS = 0x2,
+  WASM_SEG_FLAG_NO_STRIP = 0x4,
 };
 
 // Kinds of tag attributes.
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 4a7c1ca4a57182..8eef45ce565deb 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -207,11 +207,14 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
 
 class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile {
   mutable unsigned NextUniqueID = 0;
+  SmallPtrSet<GlobalObject *, 2> Used;
 
 public:
   TargetLoweringObjectFileWasm() = default;
   ~TargetLoweringObjectFileWasm() override = default;
 
+  void getModuleMetadata(Module &M) override;
+
   MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2c5b0b3d0c44c7..fc8185a5ee9ff2 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -2141,7 +2141,7 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) {
   return C;
 }
 
-static unsigned getWasmSectionFlags(SectionKind K) {
+static unsigned getWasmSectionFlags(SectionKind K, bool Retain) {
   unsigned Flags = 0;
 
   if (K.isThreadLocal())
@@ -2150,11 +2150,24 @@ static unsigned getWasmSectionFlags(SectionKind K) {
   if (K.isMergeableCString())
     Flags |= wasm::WASM_SEG_FLAG_STRINGS;
 
+  if (Retain) {
+    Flags |= wasm::WASM_SEG_FLAG_NO_STRIP;
+  }
+
   // TODO(sbc): Add suport for K.isMergeableConst()
 
   return Flags;
 }
 
+void TargetLoweringObjectFileWasm::getModuleMetadata(Module &M) {
+  SmallVector<GlobalValue *, 4> Vec;
+  collectUsedGlobalVariables(M, Vec, false);
+  for (GlobalValue *GV : Vec)
+    if (auto *GO = dyn_cast<GlobalObject>(GV))
+      Used.insert(GO);
+}
+
+
 MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // We don't support explict section names for functions in the wasm object
@@ -2178,7 +2191,7 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
     Group = C->getName();
   }
 
-  unsigned Flags = getWasmSectionFlags(Kind);
+  unsigned Flags = getWasmSectionFlags(Kind, Used.count(GO));
   MCSectionWasm *Section = getContext().getWasmSection(
       Name, Kind, Flags, Group, MCContext::GenericSectionID);
 
@@ -2187,7 +2200,8 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
 
 static MCSectionWasm *selectWasmSectionForGlobal(
     MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) {
+    const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID,
+    bool Retain) {
   StringRef Group = "";
   if (const Comdat *C = getWasmComdat(GO)) {
     Group = C->getName();
@@ -2212,7 +2226,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
     (*NextUniqueID)++;
   }
 
-  unsigned Flags = getWasmSectionFlags(Kind);
+  unsigned Flags = getWasmSectionFlags(Kind, Retain);
   return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID);
 }
 
@@ -2230,9 +2244,11 @@ MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
   else
     EmitUniqueSection = TM.getDataSections();
   EmitUniqueSection |= GO->hasComdat();
+  bool Retain = Used.count(GO);
+  EmitUniqueSection |= Retain;
 
   return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
-                                    EmitUniqueSection, &NextUniqueID);
+                                    EmitUniqueSection, &NextUniqueID, Retain);
 }
 
 bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 97045495a60dec..cfc9d113196efe 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -115,6 +115,9 @@ class WasmAsmParser : public MCAsmParserExtension {
       case 'S':
         flags |= wasm::WASM_SEG_FLAG_STRINGS;
         break;
+      case 'R':
+        flags |= wasm::WASM_SEG_FLAG_NO_STRIP;
+        break;
       default:
         return -1U;
       }
diff --git a/llvm/lib/MC/MCSectionWasm.cpp b/llvm/lib/MC/MCSectionWasm.cpp
index e90f401b1efa1f..99e581c017fd16 100644
--- a/llvm/lib/MC/MCSectionWasm.cpp
+++ b/llvm/lib/MC/MCSectionWasm.cpp
@@ -70,6 +70,8 @@ void MCSectionWasm::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
     OS << 'S';
   if (SegmentFlags & wasm::WASM_SEG_FLAG_TLS)
     OS << 'T';
+  if (SegmentFlags & wasm::WASM_SEG_FLAG_NO_STRIP)
+    OS << 'R';
 
   OS << '"';
 
diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp
index 3b53788eddbabc..846970c875d105 100644
--- a/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -561,6 +561,7 @@ void ScalarBitSetTraits<WasmYAML::SegmentFlags>::bitset(
 #define BCase(X) IO.bitSetCase(Value, #X, wasm::WASM_SEG_FLAG_##X)
   BCase(STRINGS);
   BCase(TLS);
+  BCase(NO_STRIP);
 #undef BCase
 }
 
diff --git a/llvm/test/CodeGen/WebAssembly/no-strip.ll b/llvm/test/CodeGen/WebAssembly/no-strip.ll
new file mode 100644
index 00000000000000..e6206aebdce8e2
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/no-strip.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown | FileCheck %s
+
+ at llvm.used = appending global [
+  5 x ptr
+] [
+  ptr @ga, ptr @gb, ptr @gc, ptr @gd, ptr @ge
+], section "llvm.metadata"
+
+; CHECK: .section        .data.ga,"R",@
+ at ga = global i32 42
+; CHECK: .section        .data.gb,"R",@
+ at gb = internal global i32 41
+; CHECK: .section        .data..Lgc,"R",@
+ at gc = private global i32 40
+; CHECK: .section        .rodata.gd,"R",@
+ at gd = constant i32 39
+
+; All sections with the same explicit name are flagged as retained if a part of them is retained.
+; CHECK: .section        dddd,"R",@
+ at ge = global i32 38, section "dddd"
+; CHECK: .section        dddd,"R",@
+ at gg = global i32 37, section "dddd"
diff --git a/llvm/test/MC/WebAssembly/no-dead-strip.ll b/llvm/test/MC/WebAssembly/no-dead-strip.ll
index 9b550ec6cefbc2..7ad9181cc662f4 100644
--- a/llvm/test/MC/WebAssembly/no-dead-strip.ll
+++ b/llvm/test/MC/WebAssembly/no-dead-strip.ll
@@ -1,21 +1,69 @@
-; RUN: llc -filetype=obj -wasm-keep-registers %s -o - | llvm-readobj --symbols - | FileCheck %s
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown -filetype=obj -wasm-keep-registers -o - | obj2yaml - | FileCheck %s
 
-target triple = "wasm32-unknown-unknown"
-
- at llvm.used = appending global [1 x ptr] [ptr @foo], section "llvm.metadata"
+ at llvm.used = appending global [5 x ptr] [
+  ptr @foo, ptr @gv0, ptr @gv1, ptr @gv2, ptr @gv3
+], section "llvm.metadata"
 
 define i32 @foo() {
 entry:
     ret i32 0
 }
 
-; CHECK:      Symbols [
-; CHECK-NEXT:   Symbol {
-; CHECK-NEXT:     Name: foo
-; CHECK-NEXT:     Type: FUNCTION (0x0)
-; CHECK-NEXT:     Flags [ (0x80)
-; CHECK-NEXT:       NO_STRIP (0x80)
-; CHECK-NEXT:     ]
-; CHECK-NEXT:     ElementIndex: 0x0
-; CHECK-NEXT:   }
-; CHECK-NEXT: ]
+; externally visible GV has NO_STRIP in both symtab entry and segment info
+ at gv0 = global i32 42
+; internal GV has NO_STRIP in both symtab entry and segment info
+ at gv1 = internal global i32 41
+; private GV has NO_STRIP in segment info only (no symtab entry)
+ at gv2 = private global i32 40
+; explicit section names
+ at gv3 = global i32 39, section "ddd.hello"
+ at gv4.not.used = global i64 38, section "ddd.hello"
+
+; CHECK:         SymbolTable:
+; CHECK-NEXT:      - Index:           0
+; CHECK-NEXT:        Kind:            FUNCTION
+; CHECK-NEXT:        Name:            foo
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]
+; CHECK-NEXT:        Function:        0
+; CHECK-NEXT:      - Index:           1
+; CHECK-NEXT:        Kind:            DATA
+; CHECK-NEXT:        Name:            gv0
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]
+; CHECK-NEXT:        Segment:         0
+; CHECK-NEXT:        Size:            4
+; CHECK-NEXT:      - Index:           2
+; CHECK-NEXT:        Kind:            DATA
+; CHECK-NEXT:        Name:            gv1
+; CHECK-NEXT:        Flags:           [ BINDING_LOCAL, NO_STRIP ]
+; CHECK-NEXT:        Segment:         1
+; CHECK-NEXT:        Size:            4
+; CHECK-NEXT:      - Index:           3
+; CHECK-NEXT:        Kind:            DATA
+; CHECK-NEXT:        Name:            gv3
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]
+; CHECK-NEXT:        Segment:         3
+; CHECK-NEXT:        Size:            4
+; CHECK-NEXT:      - Index:           4
+; CHECK-NEXT:        Kind:            DATA
+; CHECK-NEXT:        Name:            gv4.not.used
+; CHECK-NEXT:        Flags:           [  ]
+; CHECK-NEXT:        Segment:         3
+; CHECK-NEXT:        Offset:          8
+; CHECK-NEXT:        Size:            8
+; CHECK-NEXT:    SegmentInfo:
+; CHECK-NEXT:      - Index:           0
+; CHECK-NEXT:        Name:            .data.gv0
+; CHECK-NEXT:        Alignment:       2
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]
+; CHECK-NEXT:      - Index:           1
+; CHECK-NEXT:        Name:            .data.gv1
+; CHECK-NEXT:        Alignment:       2
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]
+; CHECK-NEXT:      - Index:           2
+; CHECK-NEXT:        Name:            .data..Lgv2
+; CHECK-NEXT:        Alignment:       2
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]
+; CHECK-NEXT:      - Index:           3
+; CHECK-NEXT:        Name:            ddd.hello
+; CHECK-NEXT:        Alignment:       3
+; CHECK-NEXT:        Flags:           [ NO_STRIP ]

``````````

</details>


https://github.com/llvm/llvm-project/pull/81539


More information about the llvm-commits mailing list