[lld] [WASM] wasm-ld: split up __wasm_apply_data_relocs (PR #129007)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 27 23:31:48 PST 2025


https://github.com/dmjio updated https://github.com/llvm/llvm-project/pull/129007

>From 005e14c1d30fb7250cf67038f4c83964f58c823e Mon Sep 17 00:00:00 2001
From: Cheng Shao <terrorjack at type.dance>
Date: Wed, 12 Feb 2025 02:27:08 +0000
Subject: [PATCH 1/2] wasm-ld: split up __wasm_apply_data_relocs

---
 lld/wasm/InputChunks.cpp       | 13 ++++--
 lld/wasm/InputChunks.h         |  2 +-
 lld/wasm/SyntheticSections.cpp |  2 +
 lld/wasm/Writer.cpp            | 74 +++++++++++++++++++++++++++-------
 4 files changed, 71 insertions(+), 20 deletions(-)

diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index ccdc92f5c8d71..40e1644de56e0 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -361,12 +361,11 @@ uint64_t InputChunk::getVA(uint64_t offset) const {
 // Generate code to apply relocations to the data section at runtime.
 // This is only called when generating shared libraries (PIC) where address are
 // not known at static link time.
-bool InputChunk::generateRelocationCode(raw_ostream &os) const {
+void InputChunk::generateRelocationCode(std::vector<std::string> &funcs) const {
   LLVM_DEBUG(dbgs() << "generating runtime relocations: " << name
                     << " count=" << relocations.size() << "\n");
 
   bool is64 = ctx.arg.is64.value_or(false);
-  bool generated = false;
   unsigned opcode_ptr_const = is64 ? WASM_OPCODE_I64_CONST
                                    : WASM_OPCODE_I32_CONST;
   unsigned opcode_ptr_add = is64 ? WASM_OPCODE_I64_ADD
@@ -385,6 +384,14 @@ bool InputChunk::generateRelocationCode(raw_ostream &os) const {
     if (!requiresRuntimeReloc)
       continue;
 
+    if (funcs.empty() || funcs.back().size() >= 7654300) {
+      funcs.emplace_back(std::string());
+      raw_string_ostream os(funcs.back());
+      writeUleb128(os, 0, "num locals");
+    }
+
+    raw_string_ostream os(funcs.back());
+
     LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel.Type)
                       << " addend=" << rel.Addend << " index=" << rel.Index
                       << " output offset=" << offset << "\n");
@@ -439,9 +446,7 @@ bool InputChunk::generateRelocationCode(raw_ostream &os) const {
     writeU8(os, opcode_reloc_store, "I32_STORE");
     writeUleb128(os, 2, "align");
     writeUleb128(os, 0, "offset");
-    generated = true;
   }
-  return generated;
 }
 
 // Split WASM_SEG_FLAG_STRINGS section. Such a section is a sequence of
diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h
index f545449e1246f..d231382a5f5ef 100644
--- a/lld/wasm/InputChunks.h
+++ b/lld/wasm/InputChunks.h
@@ -78,7 +78,7 @@ class InputChunk {
 
   size_t getNumRelocations() const { return relocations.size(); }
   void writeRelocations(llvm::raw_ostream &os) const;
-  bool generateRelocationCode(raw_ostream &os) const;
+  void generateRelocationCode(std::vector<std::string> &funcs) const;
 
   bool isTLS() const { return flags & llvm::wasm::WASM_SEG_FLAG_TLS; }
   bool isRetained() const { return flags & llvm::wasm::WASM_SEG_FLAG_RETAIN; }
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 7fb44b9f0c009..3cab58ed16f93 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -299,6 +299,8 @@ void FunctionSection::writeBody() {
 void FunctionSection::addFunction(InputFunction *func) {
   if (!func->live)
     return;
+  if (func->hasFunctionIndex())
+    return;
   uint32_t functionIndex =
       out.importSec->getNumImportedFunctions() + inputFunctions.size();
   inputFunctions.emplace_back(func);
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 7770bdcfc1f16..dd3e94b4fabee 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -1459,20 +1459,21 @@ void Writer::createStartFunction() {
 void Writer::createApplyDataRelocationsFunction() {
   LLVM_DEBUG(dbgs() << "createApplyDataRelocationsFunction\n");
   // First write the body's contents to a string.
-  std::string bodyContent;
+  std::vector<std::string> funcs;
   {
-    raw_string_ostream os(bodyContent);
-    writeUleb128(os, 0, "num locals");
-    bool generated = false;
     for (const OutputSegment *seg : segments)
       if (!ctx.arg.sharedMemory || !seg->isTLS())
         for (const InputChunk *inSeg : seg->inputSegments)
-          generated |= inSeg->generateRelocationCode(os);
+          inSeg->generateRelocationCode(funcs);
+  }
 
-    if (!generated) {
-      LLVM_DEBUG(dbgs() << "skipping empty __wasm_apply_data_relocs\n");
-      return;
-    }
+  if (funcs.empty()) {
+    LLVM_DEBUG(dbgs() << "skipping empty __wasm_apply_data_relocs\n");
+    return;
+  }
+
+  for (auto &func : funcs) {
+    raw_string_ostream os(func);
     writeU8(os, WASM_OPCODE_END, "END");
   }
 
@@ -1485,24 +1486,67 @@ void Writer::createApplyDataRelocationsFunction() {
       make<SyntheticFunction>(nullSignature, "__wasm_apply_data_relocs"));
   def->markLive();
 
-  createFunction(def, bodyContent);
+  if (funcs.size() == 1) {
+    createFunction(def, funcs.back());
+    return;
+  }
+
+  std::string body;
+  {
+    raw_string_ostream os(body);
+    writeUleb128(os, 0, "num locals");
+
+    for (std::size_t i = 0; i < funcs.size(); ++i) {
+      auto &name =
+          *make<std::string>("__wasm_apply_data_relocs_" + std::to_string(i));
+      auto *func = make<SyntheticFunction>(nullSignature, name);
+      auto *def = symtab->addSyntheticFunction(
+          name, WASM_SYMBOL_VISIBILITY_HIDDEN, func);
+      def->markLive();
+      // Normally this shouldn't be called manually for a synthetic
+      // function, since the function indices in
+      // ctx.syntheticFunctions will be calculated later (check
+      // functionSec->addFunction call hierarchy for details).
+      // However, at this point we already need the correct index. The
+      // solution is to place the new synthetic function eagerly, and
+      // also making addFunction idempotent by skipping when there's
+      // already a function index.
+      out.functionSec->addFunction(func);
+      createFunction(def, funcs[i]);
+
+      writeU8(os, WASM_OPCODE_CALL, "CALL");
+      writeUleb128(os, def->getFunctionIndex(), "function index");
+    }
+
+    writeU8(os, WASM_OPCODE_END, "END");
+  }
+  createFunction(def, body);
 }
 
 void Writer::createApplyTLSRelocationsFunction() {
   LLVM_DEBUG(dbgs() << "createApplyTLSRelocationsFunction\n");
-  std::string bodyContent;
+  std::vector<std::string> funcs;
   {
-    raw_string_ostream os(bodyContent);
-    writeUleb128(os, 0, "num locals");
     for (const OutputSegment *seg : segments)
       if (seg->isTLS())
         for (const InputChunk *inSeg : seg->inputSegments)
-          inSeg->generateRelocationCode(os);
+          inSeg->generateRelocationCode(funcs);
+  }
 
+  if (funcs.empty()) {
+    funcs.emplace_back(std::string());
+    raw_string_ostream os(funcs.back());
+    writeUleb128(os, 0, "num locals");
+  }
+
+  for (auto &func : funcs) {
+    raw_string_ostream os(func);
     writeU8(os, WASM_OPCODE_END, "END");
   }
 
-  createFunction(WasmSym::applyTLSRelocs, bodyContent);
+  assert(funcs.size() == 1);
+
+  createFunction(WasmSym::applyTLSRelocs, funcs.back());
 }
 
 // Similar to createApplyDataRelocationsFunction but generates relocation code

>From a4272a1d59a92dc2f91c1dc7e8507ad9ca73f679 Mon Sep 17 00:00:00 2001
From: dmjio <code at dmj.io>
Date: Thu, 27 Feb 2025 17:59:53 -0600
Subject: [PATCH 2/2] wasm-ld: constexpr size_t kV8MaxWasmFunctionSize =
 7'654'321;

---
 lld/wasm/InputChunks.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index 40e1644de56e0..31ebe3fd98de2 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -384,8 +384,10 @@ void InputChunk::generateRelocationCode(std::vector<std::string> &funcs) const {
     if (!requiresRuntimeReloc)
       continue;
 
-    if (funcs.empty() || funcs.back().size() >= 7654300) {
-      funcs.emplace_back(std::string());
+    // https://www.w3.org/TR/wasm-js-api-2/#limits
+    // constexpr size_t kV8MaxWasmFunctionSize = 7'654'321;
+    if (funcs.empty() || funcs.back().size() >= 7654321) {
+        funcs.emplace_back(std::string());
       raw_string_ostream os(funcs.back());
       writeUleb128(os, 0, "num locals");
     }



More information about the llvm-commits mailing list