[lld] [llvm] ldd(wasm): Support for the custom-page-sizes WebAssembly proposal (PR #128942)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 26 12:37:12 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-binary-utilities

Author: Nick Fitzgerald (fitzgen)

<details>
<summary>Changes</summary>

This commit adds support for WebAssembly's custom-page-sizes proposal to `wasm-ld`. An overview of the proposal can be found [here](https://github.com/WebAssembly/custom-page-sizes/blob/main/proposals/custom-page-sizes/Overview.md). In a sentence, it allows customizing a Wasm memory's page size, enabling Wasm to target environments with less than 64KiB of memory (the default Wasm page size) available for Wasm memories.

This commit contains the following:

* Adds a `--page-size=N` CLI flag to `wasm-ld` for configuring the linked Wasm binary's linear memory's page size.

* When the page size is configured to a non-default value, then the final Wasm binary will use the encodings defined in the custom-page-sizes proposal to declare the linear memory's page size.

* Defines a `__wasm_first_page_end` symbol, whose address points to the first page in the Wasm linear memory, a.k.a. is the Wasm memory's page size. This allows writing code that is compatible with any page size, and doesn't require re-compiling its object code. At the same time, because it just lowers to a constant rather than a memory access or something, it enables link-time optimization.

* Adds tests for these new features.

r? @<!-- -->sbc100 

cc @<!-- -->sunfishcode 

---
Full diff: https://github.com/llvm/llvm-project/pull/128942.diff


18 Files Affected:

- (added) lld/test/wasm/page-size.s (+48) 
- (modified) lld/wasm/Config.h (+1) 
- (modified) lld/wasm/Driver.cpp (+9-1) 
- (modified) lld/wasm/Options.td (+3) 
- (modified) lld/wasm/SymbolTable.cpp (+2-2) 
- (modified) lld/wasm/Symbols.cpp (+1) 
- (modified) lld/wasm/Symbols.h (+4) 
- (modified) lld/wasm/SyntheticSections.cpp (+4) 
- (modified) lld/wasm/Writer.cpp (+10-10) 
- (modified) llvm/include/llvm/BinaryFormat/Wasm.h (+4-2) 
- (modified) llvm/include/llvm/BinaryFormat/WasmTraits.h (+2-2) 
- (modified) llvm/include/llvm/MC/MCSymbolWasm.h (+1-1) 
- (modified) llvm/include/llvm/ObjectYAML/WasmYAML.h (+1) 
- (modified) llvm/lib/MC/WasmObjectWriter.cpp (+1-1) 
- (modified) llvm/lib/Object/WasmObjectFile.cpp (+6) 
- (modified) llvm/lib/ObjectYAML/WasmYAML.cpp (+2) 
- (modified) llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp (+1-1) 
- (modified) llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp (+1-1) 


``````````diff
diff --git a/lld/test/wasm/page-size.s b/lld/test/wasm/page-size.s
new file mode 100644
index 0000000000000..4ca89679705ab
--- /dev/null
+++ b/lld/test/wasm/page-size.s
@@ -0,0 +1,48 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o
+# RUN: wasm-ld -no-gc-sections -o %t.wasm %t.o --page-size=1
+# RUN: obj2yaml %t.wasm | FileCheck %s
+
+    .section .data.foo,"",@
+    .globl  foo
+    .hidden  foo
+    .p2align        2
+foo:
+    .int32  0xffffffff
+    .size   foo, 4
+
+    .section .text._start,"",@
+    .globl  _start
+_start:
+    .functype _start () -> (i32)
+    i32.const __wasm_first_page_end
+    end_function
+
+    .section .data.bar,"",@
+    .globl  bar
+    .hidden  bar
+    .p2align        2
+bar:
+    .int32  0x11111111
+    .size   bar, 4
+
+
+# CHECK:  - Type:            MEMORY
+# NEXTLN:   Memories:
+# NEXTLN:   - Flags:           [  ]
+# NEXTLN:     Minimum:         0x20000
+# NEXTLN:     PageSize:        0x1
+
+# CHECK:  - Type:            CODE
+# NEXTLN:   Functions:
+# NEXTLN:     - Index:           0
+# NEXTLN:       Locals:          []
+# NEXTLN:       Body:            4181808080000B
+
+# CHECK:  - Type:            DATA
+# NEXTLN:   Segments:
+# NEXTLN:     - SectionOffset:   7
+# NEXTLN:       InitFlags:       0
+# NEXTLN:       Offset:
+# NEXTLN:         Opcode:          I32_CONST
+# NEXTLN:         Value:           1024
+# NEXTLN:       Content:         FFFFFFFF11111111
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 1fa6c42d9cd86..649bd0a008b4b 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -94,6 +94,7 @@ struct Config {
   // runtime).
   uint64_t tableBase;
   uint64_t zStackSize;
+  uint64_t pageSize;
   unsigned ltoPartitions;
   unsigned ltoo;
   llvm::CodeGenOptLevel ltoCgo;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index c3a74dde6480e..3a4ff0cac7215 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -642,7 +642,10 @@ static void readConfigs(opt::InputArgList &args) {
   ctx.arg.maxMemory = args::getInteger(args, OPT_max_memory, 0);
   ctx.arg.noGrowableMemory = args.hasArg(OPT_no_growable_memory);
   ctx.arg.zStackSize =
-      args::getZOptionValue(args, OPT_z, "stack-size", WasmPageSize);
+      args::getZOptionValue(args, OPT_z, "stack-size", WasmDefaultPageSize);
+  ctx.arg.pageSize = args::getInteger(args, OPT_page_size, WasmDefaultPageSize);
+  if (ctx.arg.pageSize != 1 && ctx.arg.pageSize != WasmDefaultPageSize)
+    error("--page_size=N must be either 1 or 65536");
 
   // -Bdynamic by default if -pie or -shared is specified.
   if (ctx.arg.pie || ctx.arg.shared)
@@ -999,6 +1002,11 @@ static void createOptionalSymbols() {
     WasmSym::definedTableBase = symtab->addOptionalDataSymbol("__table_base");
   }
 
+  WasmSym::firstPageEnd = symtab->addOptionalDataSymbol("__wasm_first_page_end");
+  if (WasmSym::firstPageEnd) {
+      WasmSym::firstPageEnd->setVA(ctx.arg.pageSize);
+  }
+
   // For non-shared memory programs we still need to define __tls_base since we
   // allow object files built with TLS to be linked into single threaded
   // programs, and such object files can contain references to this symbol.
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index 1316dc5c70d93..6ed572137a5d5 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -230,6 +230,9 @@ def import_table: FF<"import-table">,
 def initial_heap: JJ<"initial-heap=">,
   HelpText<"Initial size of the heap">;
 
+def page_size: JJ<"page-size=">,
+  HelpText<"The Wasm page size (Defaults to 65536)">;
+
 def initial_memory: JJ<"initial-memory=">,
   HelpText<"Initial size of the linear memory">;
 
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index 7e8b4aa632a32..bbe48b03f77e5 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -792,7 +792,7 @@ Symbol *SymbolTable::addUndefinedTag(StringRef name,
 }
 
 TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) {
-  WasmLimits limits{0, 0, 0}; // Set by the writer.
+  WasmLimits limits{0, 0, 0, 0}; // Set by the writer.
   WasmTableType *type = make<WasmTableType>();
   type->ElemType = ValType::FUNCREF;
   type->Limits = limits;
@@ -807,7 +807,7 @@ TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) {
 
 TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) {
   const uint32_t invalidIndex = -1;
-  WasmLimits limits{0, 0, 0}; // Set by the writer.
+  WasmLimits limits{0, 0, 0, 0}; // Set by the writer.
   WasmTableType type{ValType::FUNCREF, limits};
   WasmTable desc{invalidIndex, type, name};
   InputTable *table = make<InputTable>(desc, nullptr);
diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index a687fd6d6c4ef..ed4917488f84c 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -84,6 +84,7 @@ DefinedFunction *WasmSym::applyGlobalRelocs;
 DefinedFunction *WasmSym::applyTLSRelocs;
 DefinedFunction *WasmSym::applyGlobalTLSRelocs;
 DefinedFunction *WasmSym::initTLS;
+DefinedData *WasmSym::firstPageEnd;
 DefinedFunction *WasmSym::startFunction;
 DefinedData *WasmSym::dsoHandle;
 DefinedData *WasmSym::dataEnd;
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index b409fffc50a6c..03a74da7230d0 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -577,6 +577,10 @@ struct WasmSym {
   static DefinedData *heapBase;
   static DefinedData *heapEnd;
 
+  // __wasm_first_page_end
+  // A symbol whose address is the end of the first page in memory (if any).
+  static DefinedData *firstPageEnd;
+
   // __wasm_init_memory_flag
   // Symbol whose contents are nonzero iff memory has already been initialized.
   static DefinedData *initMemoryFlag;
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 7fb44b9f0c009..8a27502e9bfa6 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -360,10 +360,14 @@ void MemorySection::writeBody() {
     flags |= WASM_LIMITS_FLAG_IS_SHARED;
   if (ctx.arg.is64.value_or(false))
     flags |= WASM_LIMITS_FLAG_IS_64;
+  if (ctx.arg.pageSize != WasmDefaultPageSize)
+      flags |= WASM_LIMITS_FLAG_HAS_PAGE_SIZE;
   writeUleb128(os, flags, "memory limits flags");
   writeUleb128(os, numMemoryPages, "initial pages");
   if (hasMax)
     writeUleb128(os, maxMemoryPages, "max pages");
+  if (ctx.arg.pageSize != WasmDefaultPageSize)
+    writeUleb128(os, llvm::Log2_64(ctx.arg.pageSize), "page size");
 }
 
 void TagSection::writeBody() {
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 76e38f548157c..2b871e548e926 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -445,8 +445,8 @@ void Writer::layoutMemory() {
   }
 
   if (ctx.arg.initialHeap != 0) {
-    if (ctx.arg.initialHeap != alignTo(ctx.arg.initialHeap, WasmPageSize))
-      error("initial heap must be " + Twine(WasmPageSize) + "-byte aligned");
+    if (ctx.arg.initialHeap != alignTo(ctx.arg.initialHeap, ctx.arg.pageSize))
+      error("initial heap must be aligned to the page size (" + Twine(ctx.arg.pageSize) + " bytes)");
     uint64_t maxInitialHeap = maxMemorySetting - memoryPtr;
     if (ctx.arg.initialHeap > maxInitialHeap)
       error("initial heap too large, cannot be greater than " +
@@ -455,8 +455,8 @@ void Writer::layoutMemory() {
   }
 
   if (ctx.arg.initialMemory != 0) {
-    if (ctx.arg.initialMemory != alignTo(ctx.arg.initialMemory, WasmPageSize))
-      error("initial memory must be " + Twine(WasmPageSize) + "-byte aligned");
+    if (ctx.arg.initialMemory != alignTo(ctx.arg.initialMemory, ctx.arg.pageSize))
+      error("initial memory must be aligned to the page size (" + Twine(ctx.arg.pageSize) + " bytes)");
     if (memoryPtr > ctx.arg.initialMemory)
       error("initial memory too small, " + Twine(memoryPtr) + " bytes needed");
     if (ctx.arg.initialMemory > maxMemorySetting)
@@ -465,9 +465,9 @@ void Writer::layoutMemory() {
     memoryPtr = ctx.arg.initialMemory;
   }
 
-  memoryPtr = alignTo(memoryPtr, WasmPageSize);
+  memoryPtr = alignTo(memoryPtr, ctx.arg.pageSize);
 
-  out.memorySec->numMemoryPages = memoryPtr / WasmPageSize;
+  out.memorySec->numMemoryPages = memoryPtr / ctx.arg.pageSize;
   log("mem: total pages = " + Twine(out.memorySec->numMemoryPages));
 
   if (WasmSym::heapEnd) {
@@ -480,8 +480,8 @@ void Writer::layoutMemory() {
 
   uint64_t maxMemory = 0;
   if (ctx.arg.maxMemory != 0) {
-    if (ctx.arg.maxMemory != alignTo(ctx.arg.maxMemory, WasmPageSize))
-      error("maximum memory must be " + Twine(WasmPageSize) + "-byte aligned");
+    if (ctx.arg.maxMemory != alignTo(ctx.arg.maxMemory, ctx.arg.pageSize))
+      error("maximum memory must be aligned to page size (" + Twine(ctx.arg.pageSize) + " bytes)");
     if (memoryPtr > ctx.arg.maxMemory)
       error("maximum memory too small, " + Twine(memoryPtr) + " bytes needed");
     if (ctx.arg.maxMemory > maxMemorySetting)
@@ -503,7 +503,7 @@ void Writer::layoutMemory() {
   }
 
   if (maxMemory != 0) {
-    out.memorySec->maxMemoryPages = maxMemory / WasmPageSize;
+    out.memorySec->maxMemoryPages = maxMemory / ctx.arg.pageSize;
     log("mem: max pages   = " + Twine(out.memorySec->maxMemoryPages));
   }
 }
@@ -932,7 +932,7 @@ static void finalizeIndirectFunctionTable() {
   }
 
   uint32_t tableSize = ctx.arg.tableBase + out.elemSec->numEntries();
-  WasmLimits limits = {0, tableSize, 0};
+  WasmLimits limits = {0, tableSize, 0, 0};
   if (WasmSym::indirectFunctionTable->isDefined() && !ctx.arg.growableTable) {
     limits.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
     limits.Maximum = limits.Minimum;
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index ede2d692a5949..ed2e5b2cccd42 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -28,8 +28,8 @@ const char WasmMagic[] = {'\0', 'a', 's', 'm'};
 const uint32_t WasmVersion = 0x1;
 // Wasm linking metadata version
 const uint32_t WasmMetadataVersion = 0x2;
-// Wasm uses a 64k page size
-const uint32_t WasmPageSize = 65536;
+// Wasm uses a 64k page size by default (but the custom-page-sizes proposal allows changing it)
+const uint32_t WasmDefaultPageSize = 65536;
 
 enum : unsigned {
   WASM_SEC_CUSTOM = 0,     // Custom / User-defined section
@@ -157,6 +157,7 @@ enum : unsigned {
   WASM_LIMITS_FLAG_HAS_MAX = 0x1,
   WASM_LIMITS_FLAG_IS_SHARED = 0x2,
   WASM_LIMITS_FLAG_IS_64 = 0x4,
+  WASM_LIMITS_FLAG_HAS_PAGE_SIZE = 0x8,
 };
 
 enum : unsigned {
@@ -317,6 +318,7 @@ struct WasmLimits {
   uint8_t Flags;
   uint64_t Minimum;
   uint64_t Maximum;
+  uint32_t PageSize;
 };
 
 struct WasmTableType {
diff --git a/llvm/include/llvm/BinaryFormat/WasmTraits.h b/llvm/include/llvm/BinaryFormat/WasmTraits.h
index 8167dab01c1af..dec50bbb05049 100644
--- a/llvm/include/llvm/BinaryFormat/WasmTraits.h
+++ b/llvm/include/llvm/BinaryFormat/WasmTraits.h
@@ -64,10 +64,10 @@ template <> struct DenseMapInfo<wasm::WasmGlobalType, void> {
 // Traits for using WasmLimits in a DenseMap
 template <> struct DenseMapInfo<wasm::WasmLimits, void> {
   static wasm::WasmLimits getEmptyKey() {
-    return wasm::WasmLimits{0xff, 0xff, 0xff};
+    return wasm::WasmLimits{0xff, 0xff, 0xff, 0xff};
   }
   static wasm::WasmLimits getTombstoneKey() {
-    return wasm::WasmLimits{0xee, 0xee, 0xee};
+    return wasm::WasmLimits{0xee, 0xee, 0xee, 0xee};
   }
   static unsigned getHashValue(const wasm::WasmLimits &Limits) {
     unsigned Hash = hash_value(Limits.Flags);
diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h
index e566ad83cd603..beb6b975a4cc3 100644
--- a/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -147,7 +147,7 @@ class MCSymbolWasm : public MCSymbol {
                     uint8_t flags = wasm::WASM_LIMITS_FLAG_NONE) {
     // Declare a table with element type VT and no limits (min size 0, no max
     // size).
-    wasm::WasmLimits Limits = {flags, 0, 0};
+    wasm::WasmLimits Limits = {flags, 0, 0, 0};
     setTableType({VT, Limits});
   }
 };
diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h
index 94ecc2fcfdb53..c638c5c958ed2 100644
--- a/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -48,6 +48,7 @@ struct Limits {
   LimitFlags Flags;
   yaml::Hex32 Minimum;
   yaml::Hex32 Maximum;
+  yaml::Hex32 PageSize;
 };
 
 struct Table {
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index c5a95cb3da543..ffee1ed8e5d59 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -845,7 +845,7 @@ void WasmObjectWriter::writeImportSection(ArrayRef<wasm::WasmImport> Imports,
   if (Imports.empty())
     return;
 
-  uint64_t NumPages = (DataSize + wasm::WasmPageSize - 1) / wasm::WasmPageSize;
+  uint64_t NumPages = (DataSize + wasm::WasmDefaultPageSize - 1) / wasm::WasmDefaultPageSize;
 
   SectionBookkeeping Section;
   startSection(Section, wasm::WASM_SEC_IMPORT);
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 0f6fd5612f9d8..16b6852732ef7 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -291,6 +291,12 @@ static wasm::WasmLimits readLimits(WasmObjectFile::ReadContext &Ctx) {
   Result.Minimum = readVaruint64(Ctx);
   if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
     Result.Maximum = readVaruint64(Ctx);
+  if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_PAGE_SIZE) {
+    uint32_t PageSizeLog2 = readVaruint32(Ctx);
+    if (PageSizeLog2 >= 32)
+      report_fatal_error("log2(wasm page size) too large");
+    Result.PageSize = 1 << PageSizeLog2;
+  }
   return Result;
 }
 
diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp
index 6af66ba62be18..d89ef0b1e59b0 100644
--- a/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -372,6 +372,8 @@ void MappingTraits<WasmYAML::Limits>::mapping(IO &IO,
   IO.mapRequired("Minimum", Limits.Minimum);
   if (!IO.outputting() || Limits.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
     IO.mapOptional("Maximum", Limits.Maximum);
+  if (!IO.outputting() || Limits.Flags & wasm::WASM_LIMITS_FLAG_HAS_PAGE_SIZE)
+    IO.mapOptional("PageSize", Limits.PageSize);
 }
 
 void MappingTraits<WasmYAML::ElemSegment>::mapping(
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 2a4e2c897b18d..2e8fe18502aae 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -204,7 +204,7 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
 
 // Perhaps this should go somewhere common.
 static wasm::WasmLimits defaultLimits() {
-  return {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
+    return {wasm::WASM_LIMITS_FLAG_NONE, 0, 0, 0};
 }
 
 static MCSymbolWasm *getOrCreateFunctionTableSymbol(MCContext &Ctx,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index 6cfc93ef1faae..747ef18df8d65 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -135,7 +135,7 @@ MCSymbolWasm *WebAssembly::getOrCreateFuncrefCallTableSymbol(
     // modules define the table.
     Sym->setWeak(true);
 
-    wasm::WasmLimits Limits = {0, 1, 1};
+    wasm::WasmLimits Limits = {0, 1, 1, 0};
     wasm::WasmTableType TableType = {wasm::ValType::FUNCREF, Limits};
     Sym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
     Sym->setTableType(TableType);

``````````

</details>


https://github.com/llvm/llvm-project/pull/128942


More information about the llvm-commits mailing list