[clang] [lld] [llvm] [WebAssembly] Cooperative threading for WASIP3 (PR #200855)
Sy Brand via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 8 01:41:09 PDT 2026
https://github.com/TartanLlama updated https://github.com/llvm/llvm-project/pull/200855
>From 83c5f9ab46715c25d95e0bfe6b6d5b0e1e2dd5b0 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Sun, 31 May 2026 17:23:22 +0100
Subject: [PATCH 01/15] Cooperative multithreading changes
---
clang/lib/Driver/ToolChains/WebAssembly.cpp | 8 +-
clang/test/Driver/wasm-toolchain.c | 7 ++
lld/test/wasm/cooperative-multithreading.s | 81 +++++++++++++++++++
lld/test/wasm/thread-context-abi-mismatch.s | 4 +-
lld/wasm/Config.h | 6 ++
lld/wasm/Driver.cpp | 11 ++-
lld/wasm/Options.td | 5 +-
lld/wasm/Relocations.cpp | 2 +-
lld/wasm/SyntheticSections.cpp | 20 ++---
lld/wasm/Writer.cpp | 45 +++++++----
.../WebAssembly/WebAssemblySubtarget.cpp | 7 +-
.../Target/WebAssembly/WebAssemblySubtarget.h | 4 +
.../WebAssembly/WebAssemblyTargetMachine.cpp | 11 ++-
.../WebAssembly/cooperative-strip-tls.ll | 20 +++++
.../WebAssembly/target-features-tls.ll | 1 +
15 files changed, 193 insertions(+), 39 deletions(-)
create mode 100644 lld/test/wasm/cooperative-multithreading.s
create mode 100644 llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index 4c1cd937e81aa..ce5463b167a58 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -88,8 +88,8 @@ static bool WantsPthread(const llvm::Triple &Triple, const ArgList &Args) {
return WantsPthread;
}
-static bool WantsLibcallThreadContext(const llvm::Triple &Triple,
- const ArgList &Args) {
+static bool WantsCooperativeMultithreading(const llvm::Triple &Triple,
+ const ArgList &Args) {
return Triple.getOS() == llvm::Triple::WASIp3;
}
@@ -174,8 +174,8 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
- if (WantsLibcallThreadContext(ToolChain.getTriple(), Args))
- CmdArgs.push_back("--libcall-thread-context");
+ if (WantsCooperativeMultithreading(ToolChain.getTriple(), Args))
+ CmdArgs.push_back("--cooperative-multithreading");
if (WantsPthread(ToolChain.getTriple(), Args))
CmdArgs.push_back("--shared-memory");
diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c
index 29a94aeec77a9..40d75da3166d9 100644
--- a/clang/test/Driver/wasm-toolchain.c
+++ b/clang/test/Driver/wasm-toolchain.c
@@ -303,3 +303,10 @@
// RUN: | FileCheck -check-prefix=LINK_WALI_BASIC %s
// LINK_WALI_BASIC: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
// LINK_WALI_BASIC: wasm-ld{{.*}}" "-L/foo/lib/wasm32-linux-muslwali" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out"
+
+// Test that `wasm32-wasip3` passes `--cooperative-multithreading` to the linker.
+
+// RUN: %clang -### --target=wasm32-wasip3 -fuse-ld=lld %s --sysroot /foo 2>&1 \
+// RUN: | FileCheck -check-prefix=LINK_WASIP3_COOP %s
+// LINK_WASIP3_COOP: wasm-ld{{.*}}" {{.*}} "--cooperative-multithreading"
+// LINK_WASIP3_COOP-NOT: "--libcall-thread-context"
diff --git a/lld/test/wasm/cooperative-multithreading.s b/lld/test/wasm/cooperative-multithreading.s
new file mode 100644
index 0000000000000..cb41dd392d5e2
--- /dev/null
+++ b/lld/test/wasm/cooperative-multithreading.s
@@ -0,0 +1,81 @@
+# Test that --cooperative-multithreading uses the libcall ABI naming for
+# thread-context globals (__init_stack_pointer, __init_tls_base, etc.) and
+# works without --shared-memory and atomics.
+
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
+# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o
+# RUN: obj2yaml %t.wasm | FileCheck %s
+# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS
+
+.globl __wasm_get_tls_base
+__wasm_get_tls_base:
+ .functype __wasm_get_tls_base () -> (i32)
+ i32.const 0
+ end_function
+
+.globl _start
+_start:
+ .functype _start () -> (i32)
+ call __wasm_get_tls_base
+ i32.const tls1 at TLSREL
+ i32.add
+ i32.load 0
+ call __wasm_get_tls_base
+ i32.const tls2 at TLSREL
+ i32.add
+ i32.load 0
+ i32.add
+ end_function
+
+.section .tdata.tls1,"",@
+.globl tls1
+tls1:
+ .int32 1
+ .size tls1, 4
+
+.section .tdata.tls2,"",@
+.globl tls2
+tls2:
+ .int32 2
+ .size tls2, 4
+
+.section .custom_section.target_features,"",@
+ .int8 2
+ .int8 43
+ .int8 11
+ .ascii "bulk-memory"
+ .int8 43
+ .int8 7
+ .ascii "atomics"
+
+# Memory must NOT be marked as shared.
+# CHECK: - Type: MEMORY
+# CHECK-NEXT: Memories:
+# CHECK-NEXT: - Minimum: 0x2
+# CHECK-NOT: Shared: false
+
+# Globals should use the libcall ABI naming, not the global ABI.
+# CHECK: GlobalNames:
+# CHECK-NEXT: - Index: 0
+# CHECK-NEXT: Name: __init_stack_pointer
+# CHECK-NEXT: - Index: 1
+# CHECK-NEXT: Name: __init_tls_base
+# CHECK-NEXT: - Index: 2
+# CHECK-NEXT: Name: __tls_size
+# CHECK-NEXT: - Index: 3
+# CHECK-NEXT: Name: __tls_align
+
+# DIS-LABEL: <__wasm_init_memory>:
+
+# DIS-LABEL: <_start>:
+# DIS-EMPTY:
+# DIS-NEXT: call {{[0-9]+}}
+# DIS-NEXT: i32.const 0
+# DIS-NEXT: i32.add
+# DIS-NEXT: i32.load 0
+# DIS-NEXT: call {{[0-9]+}}
+# DIS-NEXT: i32.const 4
+# DIS-NEXT: i32.add
+# DIS-NEXT: i32.load 0
+# DIS-NEXT: i32.add
+# DIS-NEXT: end
diff --git a/lld/test/wasm/thread-context-abi-mismatch.s b/lld/test/wasm/thread-context-abi-mismatch.s
index 069534cbe5762..acab6fd59d9b7 100644
--- a/lld/test/wasm/thread-context-abi-mismatch.s
+++ b/lld/test/wasm/thread-context-abi-mismatch.s
@@ -4,9 +4,9 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
# RUN: not wasm-ld --libcall-thread-context %t.o -o %t.wasm 2>&1 | FileCheck %s
+# RUN: not wasm-ld --cooperative-multithreading %t.o -o %t.wasm 2>&1 | FileCheck %s
-# CHECK: object file uses globals for thread context, but --libcall-thread-context was specified
-
+# CHECK: object file uses globals for thread context, but --libcall-thread-context or --cooperative-multithreading was specified
.globl _start
_start:
.functype _start () -> ()
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 71a378a412e9e..873d25d130424 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -46,6 +46,8 @@ enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic };
// For --build-id.
enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid };
+enum class ThreadModel { Single, Cooperative, SharedMemory };
+
// This struct contains the global configuration for the linker.
// Most fields are direct mapping from the command line options
// and such fields have the same name as the corresponding options.
@@ -65,6 +67,7 @@ struct Config {
bool growableTable;
bool gcSections;
llvm::StringSet<> keepSections;
+ bool cooperativeMultithreading;
bool libcallThreadContext;
std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport;
std::optional<llvm::StringRef> memoryExport;
@@ -134,6 +137,9 @@ struct Config {
std::optional<std::vector<std::string>> features;
std::optional<std::vector<std::string>> extraFeatures;
llvm::SmallVector<uint8_t, 0> buildIdVector;
+
+ ThreadModel threadModel = ThreadModel::Single;
+ bool isMultithreaded() const { return threadModel != ThreadModel::Single; }
};
// The Ctx object hold all other (non-configuration) global state.
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index fe1e2eec95037..20b398fc39a0c 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -561,6 +561,7 @@ static void readConfigs(opt::InputArgList &args) {
ctx.arg.soName = args.getLastArgValue(OPT_soname);
ctx.arg.importTable = args.hasArg(OPT_import_table);
ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
+ ctx.arg.cooperativeMultithreading = args.hasArg(OPT_cooperative_multithreading);
ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context);
ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
if (ctx.arg.ltoo > 3)
@@ -755,6 +756,12 @@ static void setConfigs() {
if (!ctx.arg.memoryExport.has_value() && !ctx.arg.memoryImport.has_value()) {
ctx.arg.memoryExport = memoryName;
}
+
+ if (ctx.arg.cooperativeMultithreading) {
+ ctx.arg.threadModel = ThreadModel::Cooperative;
+ ctx.arg.libcallThreadContext = true;
+ } else if (ctx.arg.sharedMemory)
+ ctx.arg.threadModel = ThreadModel::SharedMemory;
}
// Some command line options or some combinations of them are not allowed.
@@ -964,7 +971,7 @@ static void createSyntheticSymbols() {
createGlobalVariable(stack_pointer_name, !ctx.arg.libcallThreadContext);
}
- if (ctx.arg.sharedMemory) {
+ if (ctx.arg.isMultithreaded()) {
// TLS symbols are all hidden/dso-local
auto tls_base_name =
ctx.arg.libcallThreadContext ? "__init_tls_base" : "__tls_base";
@@ -1028,7 +1035,7 @@ static void createOptionalSymbols() {
//
// __tls_size and __tls_align are not needed in this case since they are only
// needed for __wasm_init_tls (which we do not create in this case).
- if (!ctx.arg.sharedMemory)
+ if (!ctx.sym.tlsBase)
ctx.sym.tlsBase = createOptionalGlobal("__tls_base", false);
}
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index 144eee33061e1..8ad386ca0ce39 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -238,9 +238,12 @@ def page_size: JJ<"page-size=">,
def initial_memory: JJ<"initial-memory=">,
HelpText<"Initial size of the linear memory">;
+def cooperative_multithreading: FF<"cooperative-multithreading">,
+ HelpText<"Enable cooperative multithreading.">;
+
def libcall_thread_context: FF<"libcall-thread-context">,
HelpText<"Use library calls for thread context access instead of globals.">;
-
+
def max_memory: JJ<"max-memory=">,
HelpText<"Maximum size of the linear memory">;
diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp
index a1840abe88b3a..cb597fdeffcf3 100644
--- a/lld/wasm/Relocations.cpp
+++ b/lld/wasm/Relocations.cpp
@@ -125,7 +125,7 @@ void scanRelocations(InputChunk *chunk) {
// In single-threaded builds TLS is lowered away and TLS data can be
// merged with normal data and allowing TLS relocation in non-TLS
// segments.
- if (ctx.arg.sharedMemory) {
+ if (ctx.arg.isMultithreaded()) {
if (!sym->isTLS()) {
error(toString(file) + ": relocation " +
relocTypeToString(reloc.Type) +
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index d1a01c7ec3f9d..a465f2fb590b3 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -57,7 +57,7 @@ void writeGetTLSBase(const Ctx &ctx, raw_ostream &os) {
writeU8(os, WASM_OPCODE_CALL, "call");
writeUleb128(os, ctx.sym.getTLSBase->getFunctionIndex(), "function index");
} else {
- writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_SET");
+ writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base");
}
}
@@ -265,11 +265,11 @@ void ImportSection::writeBody() {
import.Kind = WASM_EXTERNAL_MEMORY;
import.Memory.Flags = 0;
import.Memory.Minimum = out.memorySec->numMemoryPages;
- if (out.memorySec->maxMemoryPages != 0 || ctx.arg.sharedMemory) {
+ if (out.memorySec->maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory) {
import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
import.Memory.Maximum = out.memorySec->maxMemoryPages;
}
- if (ctx.arg.sharedMemory)
+ if (ctx.arg.threadModel == ThreadModel::SharedMemory)
import.Memory.Flags |= WASM_LIMITS_FLAG_IS_SHARED;
if (is64)
import.Memory.Flags |= WASM_LIMITS_FLAG_IS_64;
@@ -406,12 +406,12 @@ void TableSection::assignIndexes() {
void MemorySection::writeBody() {
raw_ostream &os = bodyOutputStream;
- bool hasMax = maxMemoryPages != 0 || ctx.arg.sharedMemory;
+ bool hasMax = maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory;
writeUleb128(os, 1, "memory count");
unsigned flags = 0;
if (hasMax)
flags |= WASM_LIMITS_FLAG_HAS_MAX;
- if (ctx.arg.sharedMemory)
+ if (ctx.arg.threadModel == ThreadModel::SharedMemory)
flags |= WASM_LIMITS_FLAG_IS_SHARED;
if (ctx.arg.is64.value_or(false))
flags |= WASM_LIMITS_FLAG_IS_64;
@@ -532,7 +532,7 @@ void GlobalSection::writeBody() {
mutable_ = true;
// With multi-threading any TLS globals must be mutable since they get
// set during `__wasm_apply_global_tls_relocs`
- if (ctx.arg.sharedMemory && sym->isTLS())
+ if (ctx.arg.isMultithreaded() && sym->isTLS())
mutable_ = true;
}
WasmGlobalType type{itype, mutable_};
@@ -569,10 +569,10 @@ void GlobalSection::writeBody() {
} else {
WasmInitExpr initExpr;
if (auto *d = dyn_cast<DefinedData>(sym))
- // In the sharedMemory case TLS globals are set during
- // `__wasm_apply_global_tls_relocs`, but in the non-shared case
+ // In the multithreaded case, TLS globals are set during
+ // `__wasm_apply_global_tls_relocs`, but in the single-threaded case
// we know the absolute value at link time.
- initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.sharedMemory), is64);
+ initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), is64);
else if (auto *f = dyn_cast<FunctionSymbol>(sym))
initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64);
else {
@@ -680,7 +680,7 @@ bool DataCountSection::isNeeded() const {
// instructions are not yet supported in input files. However, in the case
// of shared memory, lld itself will generate these instructions as part of
// `__wasm_init_memory`. See Writer::createInitMemoryFunction.
- return numSegments && ctx.arg.sharedMemory;
+ return numSegments && ctx.arg.isMultithreaded();
}
void LinkingSection::writeBody() {
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 688bb829e1c42..79e3c46410e8d 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -425,13 +425,13 @@ void Writer::layoutMemory() {
// Even in the absense of any actual TLS data, this symbol can still be
// referenced (for example by __builtin_thread_pointer, which should not
// return NULL).
- if (!ctx.arg.sharedMemory && ctx.sym.tlsBase) {
+ if (!ctx.arg.isMultithreaded() && ctx.sym.tlsBase) {
auto *tlsBase = cast<DefinedGlobal>(ctx.sym.tlsBase);
setGlobalPtr(tlsBase, fixedTLSBase);
}
// Make space for the memory initialization flag
- if (ctx.arg.sharedMemory && hasPassiveInitializedSegments()) {
+ if (ctx.arg.threadModel == ThreadModel::SharedMemory && hasPassiveInitializedSegments()) {
memoryPtr = alignTo(memoryPtr, 4);
ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol(
"__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN);
@@ -519,7 +519,7 @@ void Writer::layoutMemory() {
// If no maxMemory config was supplied but we are building with
// shared memory, we need to pick a sensible upper limit.
- if (ctx.arg.sharedMemory && maxMemory == 0) {
+ if (ctx.arg.threadModel == ThreadModel::SharedMemory && maxMemory == 0) {
if (ctx.isPic)
maxMemory = maxMemorySetting;
else
@@ -1057,7 +1057,15 @@ static StringRef getOutputDataSegmentName(const InputChunk &seg) {
OutputSegment *Writer::createOutputSegment(StringRef name) {
LLVM_DEBUG(dbgs() << "new segment: " << name << "\n");
OutputSegment *s = make<OutputSegment>(name);
- if (ctx.arg.sharedMemory)
+ // In the shared memory case, all data segments must be passive since they
+ // will be initialized once by the main thread and then shared with other
+ // threads. In the non-shared memory case, we use passive segments only for
+ // TLS segments, so that they can be reused, and for .bss segments, which
+ // don't need to be included in the binary at all.
+ bool needsPassiveInit = ctx.arg.threadModel == ThreadModel::SharedMemory ||
+ (ctx.arg.threadModel == ThreadModel::Cooperative &&
+ (s->isTLS() || s->name.starts_with(".bss")));
+ if (needsPassiveInit)
s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
if (!ctx.arg.relocatable && name.starts_with(".bss"))
s->isBss = true;
@@ -1198,7 +1206,7 @@ void Writer::createSyntheticInitFunctions() {
}
}
- if (ctx.arg.sharedMemory) {
+ if (ctx.arg.isMultithreaded()) {
if (out.globalSec->needsTLSRelocations()) {
ctx.sym.applyGlobalTLSRelocs = symtab->addSyntheticFunction(
"__wasm_apply_global_tls_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN,
@@ -1247,7 +1255,7 @@ void Writer::createInitMemoryFunction() {
assert(ctx.sym.initMemory);
assert(hasPassiveInitializedSegments());
uint64_t flagAddress;
- if (ctx.arg.sharedMemory) {
+ if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
assert(ctx.sym.initMemoryFlag);
flagAddress = ctx.sym.initMemoryFlag->getVA();
}
@@ -1315,7 +1323,7 @@ void Writer::createInitMemoryFunction() {
}
};
- if (ctx.arg.sharedMemory) {
+ if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
// With PIC code we cache the flag address in local 0
if (ctx.isPic) {
writeUleb128(os, 1, "num local decls");
@@ -1378,7 +1386,7 @@ void Writer::createInitMemoryFunction() {
// When we initialize the TLS segment we also set the TLS base.
// This allows the runtime to use this static copy of the TLS data
// for the first/main thread.
- if (ctx.arg.sharedMemory && s->isTLS()) {
+ if (ctx.arg.isMultithreaded() && s->isTLS()) {
if (ctx.isPic) {
// Cache the result of the addionion in local 0
writeU8(os, WASM_OPCODE_LOCAL_TEE, "local.tee");
@@ -1410,7 +1418,7 @@ void Writer::createInitMemoryFunction() {
}
}
- if (ctx.arg.sharedMemory) {
+ if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
// Set flag to 2 to mark end of initialization
writeGetFlagAddress();
writeI32Const(os, 2, "flag value");
@@ -1449,7 +1457,7 @@ void Writer::createInitMemoryFunction() {
if (needsPassiveInitialization(s) && !s->isBss) {
// The TLS region should not be dropped since its is needed
// during the initialization of each thread (__wasm_init_tls).
- if (ctx.arg.sharedMemory && s->isTLS())
+ if (ctx.arg.isMultithreaded() && s->isTLS())
continue;
// data.drop instruction
writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
@@ -1502,7 +1510,7 @@ void Writer::createApplyDataRelocationsFunction() {
writeUleb128(os, 0, "num locals");
bool generated = false;
for (const OutputSegment *seg : segments)
- if (!ctx.arg.sharedMemory || !seg->isTLS())
+ if (!ctx.arg.isMultithreaded() || !seg->isTLS())
for (const InputChunk *inSeg : seg->inputSegments)
generated |= inSeg->generateRelocationCode(os);
@@ -1656,10 +1664,17 @@ void Writer::createInitTLSFunction() {
writeUleb128(os, 0, "num locals");
if (tlsSeg) {
- writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
- writeUleb128(os, 0, "local index");
-
writeSetTLSBase(ctx, os);
+ /*
+ // In cooperative threading mode the runtime is responsible for calling
+ // __wasm_set_tls_base separately; __wasm_init_tls only copies the TLS
+ // template data.
+ if (!ctx.arg.libcallThreadContext) {
+ writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
+ writeUleb128(os, 0, "local index");
+ writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set");
+ writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index");
+ }*/
// FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend
// op.
@@ -1791,7 +1806,7 @@ void Writer::run() {
// `__memory_base` import. Unless we support the extended const expression we
// can't do addition inside the constant expression, so we much combine the
// segments into a single one that can live at `__memory_base`.
- if (ctx.isPic && !ctx.arg.extendedConst && !ctx.arg.sharedMemory) {
+ if (ctx.isPic && !ctx.arg.extendedConst && ctx.arg.threadModel != ThreadModel::SharedMemory) {
// In shared memory mode all data segments are passive and initialized
// via __wasm_init_memory.
log("-- combineOutputSegments");
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 6326b7d76db82..9dea29fb0205d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -40,9 +40,12 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU,
ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
- // WASIP3 implies using the libcall thread context.
- if (TargetTriple.getOS() == Triple::WASIp3)
+ // WASIP3 uses cooperative multithreading, which implies using libcall
+ // thread context.
+ if (TargetTriple.getOS() == Triple::WASIp3) {
+ HasCooperativeMultithreading = true;
HasLibcallThreadContext = true;
+ }
FeatureBitset Bits = getFeatureBits();
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 5c6f4cb5b36ff..f637ce59ebfce 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -52,6 +52,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
bool HasExtendedConst = false;
bool HasFP16 = false;
bool HasGC = false;
+ bool HasCooperativeMultithreading = false;
bool HasLibcallThreadContext = false;
bool HasMultiMemory = false;
bool HasMultivalue = false;
@@ -117,6 +118,9 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
bool hasExtendedConst() const { return HasExtendedConst; }
bool hasFP16() const { return HasFP16; }
bool hasGC() const { return HasGC; }
+ bool hasCooperativeMultithreading() const {
+ return HasCooperativeMultithreading;
+ }
bool hasLibcallThreadContext() const { return HasLibcallThreadContext; }
bool hasMultiMemory() const { return HasMultiMemory; }
bool hasMultivalue() const { return HasMultivalue; }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 1361dd99b7072..ee15c9093ff3b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -283,10 +283,17 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass {
bool StrippedAtomics = false;
bool StrippedTLS = false;
+ // In cooperative threading mode, thread locals are meaningful even without
+ // atomics.
+ bool CooperativeThreading =
+ WasmTM->getSubtargetImpl()->hasCooperativeMultithreading();
+
if (!Features[WebAssembly::FeatureAtomics]) {
StrippedAtomics = stripAtomics(M);
- StrippedTLS = stripThreadLocals(M);
- } else if (!Features[WebAssembly::FeatureBulkMemory]) {
+ if (!CooperativeThreading)
+ StrippedTLS = stripThreadLocals(M);
+ }
+ if (!Features[WebAssembly::FeatureBulkMemory]) {
StrippedTLS |= stripThreadLocals(M);
}
diff --git a/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll
new file mode 100644
index 0000000000000..46ac1cd0509b7
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll
@@ -0,0 +1,20 @@
+; Test that in cooperative threading mode (wasm32-wasip3), thread-local variables
+; are NOT stripped even when atomics are absent. In non-cooperative mode
+; (wasm32-unknown-unknown) TLS is stripped to .bss when atomics are absent.
+
+; RUN: llc < %s -mtriple=wasm32-wasip3 -mcpu=mvp -mattr=-atomics,+bulk-memory \
+; RUN: | FileCheck %s --check-prefixes=COOP
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=mvp -mattr=-atomics,+bulk-memory \
+; RUN: | FileCheck %s --check-prefixes=PLAIN
+
+target triple = "wasm32-unknown-unknown"
+
+ at foo = internal thread_local global i32 0
+
+; Cooperative threading: TLS is preserved — the section stays .tbss.
+; COOP: .tbss.foo
+; COOP-NOT: .bss.foo
+
+; Non-cooperative: TLS stripped
+; PLAIN: .bss.foo
+; PLAIN-NOT: .tbss.foo
diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
index 4abe01a73aeee..92333f3c7b9f1 100644
--- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -32,3 +32,4 @@ target triple = "wasm32-unknown-unknown"
; BULK-MEM-NEXT: .int8 15
; BULK-MEM-NEXT: .ascii "bulk-memory-opt"
; BULK-MEM-NEXT: .tbss.foo,"T",@
+
>From 8fea1e2a54c8055132798269ca8b619729ea9db2 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 1 Jun 2026 16:17:23 +0100
Subject: [PATCH 02/15] Cleanup createInitTLSFunction
---
lld/wasm/Writer.cpp | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 79e3c46410e8d..aa6b84c6f925f 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -1664,17 +1664,9 @@ void Writer::createInitTLSFunction() {
writeUleb128(os, 0, "num locals");
if (tlsSeg) {
+ writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
+ writeUleb128(os, 0, "local index");
writeSetTLSBase(ctx, os);
- /*
- // In cooperative threading mode the runtime is responsible for calling
- // __wasm_set_tls_base separately; __wasm_init_tls only copies the TLS
- // template data.
- if (!ctx.arg.libcallThreadContext) {
- writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
- writeUleb128(os, 0, "local index");
- writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set");
- writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index");
- }*/
// FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend
// op.
>From ca1b2937d96037de1007677ca739a09162cae96f Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 1 Jun 2026 16:19:54 +0100
Subject: [PATCH 03/15] Remove newline
---
llvm/test/CodeGen/WebAssembly/target-features-tls.ll | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
index 92333f3c7b9f1..4abe01a73aeee 100644
--- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -32,4 +32,3 @@ target triple = "wasm32-unknown-unknown"
; BULK-MEM-NEXT: .int8 15
; BULK-MEM-NEXT: .ascii "bulk-memory-opt"
; BULK-MEM-NEXT: .tbss.foo,"T",@
-
>From e76828df6b96738c4b49708ac7888a63faa17d3b Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 1 Jun 2026 16:35:47 +0100
Subject: [PATCH 04/15] Replace libcall-thread-context flag
---
lld/test/wasm/stack-pointer-abi.s | 2 +-
lld/test/wasm/thread-context-abi-mismatch.s | 3 +--
lld/test/wasm/tls-libcall.s | 2 +-
lld/wasm/Driver.cpp | 1 -
lld/wasm/Options.td | 3 ---
lld/wasm/Writer.cpp | 2 +-
6 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/lld/test/wasm/stack-pointer-abi.s b/lld/test/wasm/stack-pointer-abi.s
index 869f972710991..fbae0475bcba2 100644
--- a/lld/test/wasm/stack-pointer-abi.s
+++ b/lld/test/wasm/stack-pointer-abi.s
@@ -1,5 +1,5 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --libcall-thread-context --no-gc-sections -o %t.libcall.wasm %t.o
+# RUN: wasm-ld --cooperative-threading --no-gc-sections -o %t.libcall.wasm %t.o
# RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL
# RUN: wasm-ld --no-gc-sections -o %t.global.wasm %t.o
# RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL
diff --git a/lld/test/wasm/thread-context-abi-mismatch.s b/lld/test/wasm/thread-context-abi-mismatch.s
index acab6fd59d9b7..a817ca4407aab 100644
--- a/lld/test/wasm/thread-context-abi-mismatch.s
+++ b/lld/test/wasm/thread-context-abi-mismatch.s
@@ -3,10 +3,9 @@
# as an indication that the global thread context ABI is being used.
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: not wasm-ld --libcall-thread-context %t.o -o %t.wasm 2>&1 | FileCheck %s
# RUN: not wasm-ld --cooperative-multithreading %t.o -o %t.wasm 2>&1 | FileCheck %s
-# CHECK: object file uses globals for thread context, but --libcall-thread-context or --cooperative-multithreading was specified
+# CHECK: object file uses globals for thread context, but --cooperative-multithreading was specified
.globl _start
_start:
.functype _start () -> ()
diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s
index df8b8f8be0207..a0a7f37379bac 100644
--- a/lld/test/wasm/tls-libcall.s
+++ b/lld/test/wasm/tls-libcall.s
@@ -1,5 +1,5 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --libcall-thread-context --shared-memory -no-gc-sections -o %t.wasm %t.o
+# RUN: wasm-ld --cooperative-threading --shared-memory -no-gc-sections -o %t.wasm %t.o
# RUN: obj2yaml %t.wasm | FileCheck %s
# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 20b398fc39a0c..1ef4f55becc50 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -562,7 +562,6 @@ static void readConfigs(opt::InputArgList &args) {
ctx.arg.importTable = args.hasArg(OPT_import_table);
ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
ctx.arg.cooperativeMultithreading = args.hasArg(OPT_cooperative_multithreading);
- ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context);
ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
if (ctx.arg.ltoo > 3)
error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index 8ad386ca0ce39..bd2a7a19e0887 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -241,9 +241,6 @@ def initial_memory: JJ<"initial-memory=">,
def cooperative_multithreading: FF<"cooperative-multithreading">,
HelpText<"Enable cooperative multithreading.">;
-def libcall_thread_context: FF<"libcall-thread-context">,
- HelpText<"Use library calls for thread context access instead of globals.">;
-
def max_memory: JJ<"max-memory=">,
HelpText<"Maximum size of the linear memory">;
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index aa6b84c6f925f..2128c1b213e5c 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -653,7 +653,7 @@ void Writer::populateTargetFeatures() {
sym->importModule && sym->importModule == "env";
}))
error(fileName + ": object file uses globals for thread context, "
- "but --libcall-thread-context was specified");
+ "but --cooperative-threading was specified");
}
if (inferFeatures)
>From 77c1c717e48adbe1f5800b0268a586e658dd6b5c Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 1 Jun 2026 16:36:36 +0100
Subject: [PATCH 05/15] fmt
---
lld/wasm/Driver.cpp | 3 ++-
lld/wasm/SyntheticSections.cpp | 9 ++++++---
lld/wasm/Writer.cpp | 10 ++++++----
3 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 1ef4f55becc50..17781995815f5 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -561,7 +561,8 @@ static void readConfigs(opt::InputArgList &args) {
ctx.arg.soName = args.getLastArgValue(OPT_soname);
ctx.arg.importTable = args.hasArg(OPT_import_table);
ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
- ctx.arg.cooperativeMultithreading = args.hasArg(OPT_cooperative_multithreading);
+ ctx.arg.cooperativeMultithreading =
+ args.hasArg(OPT_cooperative_multithreading);
ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
if (ctx.arg.ltoo > 3)
error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index a465f2fb590b3..6c7d46787d661 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -265,7 +265,8 @@ void ImportSection::writeBody() {
import.Kind = WASM_EXTERNAL_MEMORY;
import.Memory.Flags = 0;
import.Memory.Minimum = out.memorySec->numMemoryPages;
- if (out.memorySec->maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory) {
+ if (out.memorySec->maxMemoryPages != 0 ||
+ ctx.arg.threadModel == ThreadModel::SharedMemory) {
import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
import.Memory.Maximum = out.memorySec->maxMemoryPages;
}
@@ -406,7 +407,8 @@ void TableSection::assignIndexes() {
void MemorySection::writeBody() {
raw_ostream &os = bodyOutputStream;
- bool hasMax = maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory;
+ bool hasMax =
+ maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory;
writeUleb128(os, 1, "memory count");
unsigned flags = 0;
if (hasMax)
@@ -572,7 +574,8 @@ void GlobalSection::writeBody() {
// In the multithreaded case, TLS globals are set during
// `__wasm_apply_global_tls_relocs`, but in the single-threaded case
// we know the absolute value at link time.
- initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), is64);
+ initExpr =
+ intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), is64);
else if (auto *f = dyn_cast<FunctionSymbol>(sym))
initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64);
else {
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 2128c1b213e5c..cf99208456a11 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -431,7 +431,8 @@ void Writer::layoutMemory() {
}
// Make space for the memory initialization flag
- if (ctx.arg.threadModel == ThreadModel::SharedMemory && hasPassiveInitializedSegments()) {
+ if (ctx.arg.threadModel == ThreadModel::SharedMemory &&
+ hasPassiveInitializedSegments()) {
memoryPtr = alignTo(memoryPtr, 4);
ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol(
"__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN);
@@ -1063,8 +1064,8 @@ OutputSegment *Writer::createOutputSegment(StringRef name) {
// TLS segments, so that they can be reused, and for .bss segments, which
// don't need to be included in the binary at all.
bool needsPassiveInit = ctx.arg.threadModel == ThreadModel::SharedMemory ||
- (ctx.arg.threadModel == ThreadModel::Cooperative &&
- (s->isTLS() || s->name.starts_with(".bss")));
+ (ctx.arg.threadModel == ThreadModel::Cooperative &&
+ (s->isTLS() || s->name.starts_with(".bss")));
if (needsPassiveInit)
s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
if (!ctx.arg.relocatable && name.starts_with(".bss"))
@@ -1798,7 +1799,8 @@ void Writer::run() {
// `__memory_base` import. Unless we support the extended const expression we
// can't do addition inside the constant expression, so we much combine the
// segments into a single one that can live at `__memory_base`.
- if (ctx.isPic && !ctx.arg.extendedConst && ctx.arg.threadModel != ThreadModel::SharedMemory) {
+ if (ctx.isPic && !ctx.arg.extendedConst &&
+ ctx.arg.threadModel != ThreadModel::SharedMemory) {
// In shared memory mode all data segments are passive and initialized
// via __wasm_init_memory.
log("-- combineOutputSegments");
>From 763392d058e4298f0bee6e66f17adb703b552a66 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 1 Jun 2026 16:43:01 +0100
Subject: [PATCH 06/15] Correct output segments
---
lld/wasm/Writer.cpp | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index cf99208456a11..d60dfcdaf43a6 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -1125,7 +1125,7 @@ void Writer::combineOutputSegments() {
// This restriction does not apply when the extended const extension is
// available: https://github.com/WebAssembly/extended-const
assert(!ctx.arg.extendedConst);
- assert(ctx.isPic && !ctx.arg.sharedMemory);
+ assert(ctx.isPic && !ctx.arg.isMultithreaded());
if (segments.size() <= 1)
return;
OutputSegment *combined = make<OutputSegment>(".data");
@@ -1799,10 +1799,9 @@ void Writer::run() {
// `__memory_base` import. Unless we support the extended const expression we
// can't do addition inside the constant expression, so we much combine the
// segments into a single one that can live at `__memory_base`.
- if (ctx.isPic && !ctx.arg.extendedConst &&
- ctx.arg.threadModel != ThreadModel::SharedMemory) {
- // In shared memory mode all data segments are passive and initialized
- // via __wasm_init_memory.
+ if (ctx.isPic && !ctx.arg.extendedConst && !ctx.arg.isMultithreaded()) {
+ // In multithreaded modes (shared or cooperative), data segments may be
+ // passive and must not be combined into a single active segment.
log("-- combineOutputSegments");
combineOutputSegments();
}
>From c68d4de08b8ac66b38a6d24adad0c26040aaa134 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 1 Jun 2026 16:53:02 +0100
Subject: [PATCH 07/15] Cleanup
---
lld/wasm/Config.h | 6 +-----
lld/wasm/Driver.cpp | 5 -----
lld/wasm/SyntheticSections.cpp | 8 ++++----
lld/wasm/Writer.cpp | 14 +++++++-------
4 files changed, 12 insertions(+), 21 deletions(-)
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 873d25d130424..af74f0f40bbdf 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -46,8 +46,6 @@ enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic };
// For --build-id.
enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid };
-enum class ThreadModel { Single, Cooperative, SharedMemory };
-
// This struct contains the global configuration for the linker.
// Most fields are direct mapping from the command line options
// and such fields have the same name as the corresponding options.
@@ -68,7 +66,6 @@ struct Config {
bool gcSections;
llvm::StringSet<> keepSections;
bool cooperativeMultithreading;
- bool libcallThreadContext;
std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport;
std::optional<llvm::StringRef> memoryExport;
bool sharedMemory;
@@ -138,8 +135,7 @@ struct Config {
std::optional<std::vector<std::string>> extraFeatures;
llvm::SmallVector<uint8_t, 0> buildIdVector;
- ThreadModel threadModel = ThreadModel::Single;
- bool isMultithreaded() const { return threadModel != ThreadModel::Single; }
+ bool isMultithreaded() const { return sharedMemory || cooperativeMultithreading; }
};
// The Ctx object hold all other (non-configuration) global state.
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 17781995815f5..b06e0fbb55eec 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -757,11 +757,6 @@ static void setConfigs() {
ctx.arg.memoryExport = memoryName;
}
- if (ctx.arg.cooperativeMultithreading) {
- ctx.arg.threadModel = ThreadModel::Cooperative;
- ctx.arg.libcallThreadContext = true;
- } else if (ctx.arg.sharedMemory)
- ctx.arg.threadModel = ThreadModel::SharedMemory;
}
// Some command line options or some combinations of them are not allowed.
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 6c7d46787d661..753a1c7fe5c82 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -266,11 +266,11 @@ void ImportSection::writeBody() {
import.Memory.Flags = 0;
import.Memory.Minimum = out.memorySec->numMemoryPages;
if (out.memorySec->maxMemoryPages != 0 ||
- ctx.arg.threadModel == ThreadModel::SharedMemory) {
+ ctx.arg.sharedMemory) {
import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
import.Memory.Maximum = out.memorySec->maxMemoryPages;
}
- if (ctx.arg.threadModel == ThreadModel::SharedMemory)
+ if (ctx.arg.sharedMemory)
import.Memory.Flags |= WASM_LIMITS_FLAG_IS_SHARED;
if (is64)
import.Memory.Flags |= WASM_LIMITS_FLAG_IS_64;
@@ -408,12 +408,12 @@ void MemorySection::writeBody() {
raw_ostream &os = bodyOutputStream;
bool hasMax =
- maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory;
+ maxMemoryPages != 0 || ctx.arg.sharedMemory;
writeUleb128(os, 1, "memory count");
unsigned flags = 0;
if (hasMax)
flags |= WASM_LIMITS_FLAG_HAS_MAX;
- if (ctx.arg.threadModel == ThreadModel::SharedMemory)
+ if (ctx.arg.sharedMemory)
flags |= WASM_LIMITS_FLAG_IS_SHARED;
if (ctx.arg.is64.value_or(false))
flags |= WASM_LIMITS_FLAG_IS_64;
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index d60dfcdaf43a6..48145d005c117 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -431,7 +431,7 @@ void Writer::layoutMemory() {
}
// Make space for the memory initialization flag
- if (ctx.arg.threadModel == ThreadModel::SharedMemory &&
+ if (ctx.arg.sharedMemory &&
hasPassiveInitializedSegments()) {
memoryPtr = alignTo(memoryPtr, 4);
ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol(
@@ -520,7 +520,7 @@ void Writer::layoutMemory() {
// If no maxMemory config was supplied but we are building with
// shared memory, we need to pick a sensible upper limit.
- if (ctx.arg.threadModel == ThreadModel::SharedMemory && maxMemory == 0) {
+ if (ctx.arg.sharedMemory && maxMemory == 0) {
if (ctx.isPic)
maxMemory = maxMemorySetting;
else
@@ -1063,8 +1063,8 @@ OutputSegment *Writer::createOutputSegment(StringRef name) {
// threads. In the non-shared memory case, we use passive segments only for
// TLS segments, so that they can be reused, and for .bss segments, which
// don't need to be included in the binary at all.
- bool needsPassiveInit = ctx.arg.threadModel == ThreadModel::SharedMemory ||
- (ctx.arg.threadModel == ThreadModel::Cooperative &&
+ bool needsPassiveInit = ctx.arg.sharedMemory ||
+ (ctx.arg.cooperativeMultithreading &&
(s->isTLS() || s->name.starts_with(".bss")));
if (needsPassiveInit)
s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
@@ -1256,7 +1256,7 @@ void Writer::createInitMemoryFunction() {
assert(ctx.sym.initMemory);
assert(hasPassiveInitializedSegments());
uint64_t flagAddress;
- if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
+ if (ctx.arg.sharedMemory) {
assert(ctx.sym.initMemoryFlag);
flagAddress = ctx.sym.initMemoryFlag->getVA();
}
@@ -1324,7 +1324,7 @@ void Writer::createInitMemoryFunction() {
}
};
- if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
+ if (ctx.arg.sharedMemory) {
// With PIC code we cache the flag address in local 0
if (ctx.isPic) {
writeUleb128(os, 1, "num local decls");
@@ -1419,7 +1419,7 @@ void Writer::createInitMemoryFunction() {
}
}
- if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
+ if (ctx.arg.sharedMemory) {
// Set flag to 2 to mark end of initialization
writeGetFlagAddress();
writeI32Const(os, 2, "flag value");
>From c1a15696ba1d5de74ca5c0404b4405cc2f148a4e Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 1 Jun 2026 17:07:29 +0100
Subject: [PATCH 08/15] Cleanup options
---
clang/lib/Driver/ToolChains/WebAssembly.cpp | 6 +++++-
lld/test/wasm/cooperative-multithreading.s | 4 ++++
lld/wasm/Config.h | 1 +
lld/wasm/Driver.cpp | 8 ++++++--
lld/wasm/Options.td | 2 +-
5 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index ce5463b167a58..d1e1766a0dee3 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -93,6 +93,10 @@ static bool WantsCooperativeMultithreading(const llvm::Triple &Triple,
return Triple.getOS() == llvm::Triple::WASIp3;
}
+static bool WantsSharedMemory(const llvm::Triple &Triple, const ArgList &Args) {
+ return WantsPthread(Triple, Args) && !WantsCooperativeMultithreading(Triple, Args);
+}
+
void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
@@ -177,7 +181,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
if (WantsCooperativeMultithreading(ToolChain.getTriple(), Args))
CmdArgs.push_back("--cooperative-multithreading");
- if (WantsPthread(ToolChain.getTriple(), Args))
+ if (WantsSharedMemory(ToolChain.getTriple(), Args))
CmdArgs.push_back("--shared-memory");
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
diff --git a/lld/test/wasm/cooperative-multithreading.s b/lld/test/wasm/cooperative-multithreading.s
index cb41dd392d5e2..5adfaa99e40a0 100644
--- a/lld/test/wasm/cooperative-multithreading.s
+++ b/lld/test/wasm/cooperative-multithreading.s
@@ -7,6 +7,10 @@
# RUN: obj2yaml %t.wasm | FileCheck %s
# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS
+# Test that --cooperative-multithreading and --shared-memory are mutually exclusive.
+# RUN: not wasm-ld --cooperative-multithreading --shared-memory %t.o -o %t2.wasm 2>&1 | FileCheck %s --check-prefix=INCOMPAT
+# INCOMPAT: --cooperative-multithreading is incompatible with --shared-memory
+
.globl __wasm_get_tls_base
__wasm_get_tls_base:
.functype __wasm_get_tls_base () -> (i32)
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index af74f0f40bbdf..60b04ad5abc87 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -66,6 +66,7 @@ struct Config {
bool gcSections;
llvm::StringSet<> keepSections;
bool cooperativeMultithreading;
+ bool libcallThreadContext;
std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport;
std::optional<llvm::StringRef> memoryExport;
bool sharedMemory;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index b06e0fbb55eec..605aa5dcebe94 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -556,13 +556,12 @@ static void readConfigs(opt::InputArgList &args) {
} else if (args.hasArg(OPT_export_memory)) {
ctx.arg.memoryExport = memoryName;
}
-
ctx.arg.sharedMemory = args.hasArg(OPT_shared_memory);
ctx.arg.soName = args.getLastArgValue(OPT_soname);
ctx.arg.importTable = args.hasArg(OPT_import_table);
ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
ctx.arg.cooperativeMultithreading =
- args.hasArg(OPT_cooperative_multithreading);
+ args.hasArg(OPT_cooperative_multithreading);;
ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
if (ctx.arg.ltoo > 3)
error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
@@ -757,6 +756,11 @@ static void setConfigs() {
ctx.arg.memoryExport = memoryName;
}
+ if (ctx.arg.cooperativeMultithreading) {
+ if (ctx.arg.sharedMemory)
+ error("--cooperative-multithreading is incompatible with --shared-memory");
+ ctx.arg.libcallThreadContext = true;
+ }
}
// Some command line options or some combinations of them are not allowed.
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index bd2a7a19e0887..6d18a0400ef97 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -240,7 +240,7 @@ def initial_memory: JJ<"initial-memory=">,
def cooperative_multithreading: FF<"cooperative-multithreading">,
HelpText<"Enable cooperative multithreading.">;
-
+
def max_memory: JJ<"max-memory=">,
HelpText<"Maximum size of the linear memory">;
>From 843491367a07d034b255ba630e6b00e17c58bca5 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 1 Jun 2026 17:10:42 +0100
Subject: [PATCH 09/15] fmt
---
clang/lib/Driver/ToolChains/WebAssembly.cpp | 3 ++-
lld/wasm/Config.h | 4 +++-
lld/wasm/Driver.cpp | 8 +++++---
lld/wasm/SyntheticSections.cpp | 6 ++----
lld/wasm/Writer.cpp | 9 ++++-----
5 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index d1e1766a0dee3..d8f23175eb58b 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -94,7 +94,8 @@ static bool WantsCooperativeMultithreading(const llvm::Triple &Triple,
}
static bool WantsSharedMemory(const llvm::Triple &Triple, const ArgList &Args) {
- return WantsPthread(Triple, Args) && !WantsCooperativeMultithreading(Triple, Args);
+ return WantsPthread(Triple, Args) &&
+ !WantsCooperativeMultithreading(Triple, Args);
}
void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 60b04ad5abc87..d4789b88203eb 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -136,7 +136,9 @@ struct Config {
std::optional<std::vector<std::string>> extraFeatures;
llvm::SmallVector<uint8_t, 0> buildIdVector;
- bool isMultithreaded() const { return sharedMemory || cooperativeMultithreading; }
+ bool isMultithreaded() const {
+ return sharedMemory || cooperativeMultithreading;
+ }
};
// The Ctx object hold all other (non-configuration) global state.
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 605aa5dcebe94..b2723220b5afb 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -556,12 +556,14 @@ static void readConfigs(opt::InputArgList &args) {
} else if (args.hasArg(OPT_export_memory)) {
ctx.arg.memoryExport = memoryName;
}
+
ctx.arg.sharedMemory = args.hasArg(OPT_shared_memory);
ctx.arg.soName = args.getLastArgValue(OPT_soname);
ctx.arg.importTable = args.hasArg(OPT_import_table);
ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
ctx.arg.cooperativeMultithreading =
- args.hasArg(OPT_cooperative_multithreading);;
+ args.hasArg(OPT_cooperative_multithreading);
+ ;
ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
if (ctx.arg.ltoo > 3)
error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
@@ -755,10 +757,10 @@ static void setConfigs() {
if (!ctx.arg.memoryExport.has_value() && !ctx.arg.memoryImport.has_value()) {
ctx.arg.memoryExport = memoryName;
}
-
if (ctx.arg.cooperativeMultithreading) {
if (ctx.arg.sharedMemory)
- error("--cooperative-multithreading is incompatible with --shared-memory");
+ error(
+ "--cooperative-multithreading is incompatible with --shared-memory");
ctx.arg.libcallThreadContext = true;
}
}
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 753a1c7fe5c82..050f61c7f5c56 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -265,8 +265,7 @@ void ImportSection::writeBody() {
import.Kind = WASM_EXTERNAL_MEMORY;
import.Memory.Flags = 0;
import.Memory.Minimum = out.memorySec->numMemoryPages;
- if (out.memorySec->maxMemoryPages != 0 ||
- ctx.arg.sharedMemory) {
+ if (out.memorySec->maxMemoryPages != 0 || ctx.arg.sharedMemory) {
import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
import.Memory.Maximum = out.memorySec->maxMemoryPages;
}
@@ -407,8 +406,7 @@ void TableSection::assignIndexes() {
void MemorySection::writeBody() {
raw_ostream &os = bodyOutputStream;
- bool hasMax =
- maxMemoryPages != 0 || ctx.arg.sharedMemory;
+ bool hasMax = maxMemoryPages != 0 || ctx.arg.sharedMemory;
writeUleb128(os, 1, "memory count");
unsigned flags = 0;
if (hasMax)
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 48145d005c117..d90ca859f3479 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -431,8 +431,7 @@ void Writer::layoutMemory() {
}
// Make space for the memory initialization flag
- if (ctx.arg.sharedMemory &&
- hasPassiveInitializedSegments()) {
+ if (ctx.arg.sharedMemory && hasPassiveInitializedSegments()) {
memoryPtr = alignTo(memoryPtr, 4);
ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol(
"__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN);
@@ -1063,9 +1062,9 @@ OutputSegment *Writer::createOutputSegment(StringRef name) {
// threads. In the non-shared memory case, we use passive segments only for
// TLS segments, so that they can be reused, and for .bss segments, which
// don't need to be included in the binary at all.
- bool needsPassiveInit = ctx.arg.sharedMemory ||
- (ctx.arg.cooperativeMultithreading &&
- (s->isTLS() || s->name.starts_with(".bss")));
+ bool needsPassiveInit =
+ ctx.arg.sharedMemory || (ctx.arg.cooperativeMultithreading &&
+ (s->isTLS() || s->name.starts_with(".bss")));
if (needsPassiveInit)
s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
if (!ctx.arg.relocatable && name.starts_with(".bss"))
>From 95db2cb7aa88390b3e92971bc50c0cde6881c319 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Fri, 5 Jun 2026 09:19:17 +0100
Subject: [PATCH 10/15] Fix tests
---
lld/test/wasm/stack-pointer-abi.s | 2 +-
lld/test/wasm/tls-libcall.s | 2 +-
lld/wasm/Writer.cpp | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/lld/test/wasm/stack-pointer-abi.s b/lld/test/wasm/stack-pointer-abi.s
index fbae0475bcba2..c8c6370dbc7ff 100644
--- a/lld/test/wasm/stack-pointer-abi.s
+++ b/lld/test/wasm/stack-pointer-abi.s
@@ -1,5 +1,5 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --cooperative-threading --no-gc-sections -o %t.libcall.wasm %t.o
+# RUN: wasm-ld --cooperative-multithreading --no-gc-sections -o %t.libcall.wasm %t.o
# RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL
# RUN: wasm-ld --no-gc-sections -o %t.global.wasm %t.o
# RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL
diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s
index a0a7f37379bac..b8d8935dbf766 100644
--- a/lld/test/wasm/tls-libcall.s
+++ b/lld/test/wasm/tls-libcall.s
@@ -1,5 +1,5 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --cooperative-threading --shared-memory -no-gc-sections -o %t.wasm %t.o
+# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o
# RUN: obj2yaml %t.wasm | FileCheck %s
# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index a03e5ff34e9e2..9f68432e1dc33 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -650,7 +650,7 @@ void Writer::populateTargetFeatures() {
sym->importModule && sym->importModule == "env";
}))
error(fileName + ": object file uses globals for thread context, "
- "but --cooperative-threading was specified");
+ "but --cooperative-multithreading was specified");
}
if (inferFeatures)
>From 617d774fe49e4a531c3f52483f1819d18d631d77 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Fri, 5 Jun 2026 11:25:55 +0100
Subject: [PATCH 11/15] Update comment
---
lld/wasm/Driver.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index b2723220b5afb..347f78b342513 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -1027,7 +1027,7 @@ static void createOptionalSymbols() {
if (ctx.sym.firstPageEnd)
ctx.sym.firstPageEnd->setVA(ctx.arg.pageSize);
- // For non-shared memory programs we still need to define __tls_base since we
+ // For non-multithreaded programs we still need to define __tls_base since we
// allow object files built with TLS to be linked into single threaded
// programs, and such object files can contain references to this symbol.
//
>From dd5aace9abcc3fcc4717c54450f26287397676e4 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Fri, 5 Jun 2026 11:29:32 +0100
Subject: [PATCH 12/15] Update test
---
llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll
index 46ac1cd0509b7..0cefa1b6b1f21 100644
--- a/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll
+++ b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll
@@ -1,6 +1,6 @@
; Test that in cooperative threading mode (wasm32-wasip3), thread-local variables
; are NOT stripped even when atomics are absent. In non-cooperative mode
-; (wasm32-unknown-unknown) TLS is stripped to .bss when atomics are absent.
+; (wasm32-unknown-unknown) TLS is treated as normal data when atomics are absent.
; RUN: llc < %s -mtriple=wasm32-wasip3 -mcpu=mvp -mattr=-atomics,+bulk-memory \
; RUN: | FileCheck %s --check-prefixes=COOP
@@ -10,11 +10,16 @@
target triple = "wasm32-unknown-unknown"
@foo = internal thread_local global i32 0
+ at bar = internal thread_local global i32 1
; Cooperative threading: TLS is preserved — the section stays .tbss.
; COOP: .tbss.foo
+; COOP: .tdata.bar
; COOP-NOT: .bss.foo
+; COOP-NOT: .data.bar
; Non-cooperative: TLS stripped
; PLAIN: .bss.foo
+; PLAIN: .data.bar
; PLAIN-NOT: .tbss.foo
+; PLAIN-NOT: .tdata.bar
>From 8258f165ba7c51dacc5d1c4bab9a0d435997c9e5 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 8 Jun 2026 09:35:32 +0100
Subject: [PATCH 13/15] Cooperative multithreading -> cooperative threading
---
clang/lib/Driver/ToolChains/WebAssembly.cpp | 2 +-
clang/test/Driver/wasm-toolchain.c | 5 ++---
...rative-multithreading.s => cooperative-threading.s} | 10 +++++-----
lld/test/wasm/stack-pointer-abi.s | 2 +-
lld/test/wasm/thread-context-abi-mismatch.s | 4 ++--
lld/test/wasm/tls-libcall.s | 2 +-
lld/wasm/Config.h | 4 ++--
lld/wasm/Driver.cpp | 8 ++++----
lld/wasm/Options.td | 2 +-
lld/wasm/Writer.cpp | 4 ++--
10 files changed, 21 insertions(+), 22 deletions(-)
rename lld/test/wasm/{cooperative-multithreading.s => cooperative-threading.s} (82%)
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index 9add4e157dc53..be418e7db1724 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -180,7 +180,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
if (WantsCooperativeMultithreading(ToolChain.getTriple(), Args))
- CmdArgs.push_back("--cooperative-multithreading");
+ CmdArgs.push_back("--cooperative-threading");
if (WantsSharedMemory(ToolChain.getTriple(), Args))
CmdArgs.push_back("--shared-memory");
diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c
index 40d75da3166d9..c02a102fab081 100644
--- a/clang/test/Driver/wasm-toolchain.c
+++ b/clang/test/Driver/wasm-toolchain.c
@@ -304,9 +304,8 @@
// LINK_WALI_BASIC: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
// LINK_WALI_BASIC: wasm-ld{{.*}}" "-L/foo/lib/wasm32-linux-muslwali" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out"
-// Test that `wasm32-wasip3` passes `--cooperative-multithreading` to the linker.
+// Test that `wasm32-wasip3` passes `--cooperative-threading` to the linker.
// RUN: %clang -### --target=wasm32-wasip3 -fuse-ld=lld %s --sysroot /foo 2>&1 \
// RUN: | FileCheck -check-prefix=LINK_WASIP3_COOP %s
-// LINK_WASIP3_COOP: wasm-ld{{.*}}" {{.*}} "--cooperative-multithreading"
-// LINK_WASIP3_COOP-NOT: "--libcall-thread-context"
+// LINK_WASIP3_COOP: wasm-ld{{.*}}" {{.*}} "--cooperative-threading"
diff --git a/lld/test/wasm/cooperative-multithreading.s b/lld/test/wasm/cooperative-threading.s
similarity index 82%
rename from lld/test/wasm/cooperative-multithreading.s
rename to lld/test/wasm/cooperative-threading.s
index 5adfaa99e40a0..89f3ebc82864c 100644
--- a/lld/test/wasm/cooperative-multithreading.s
+++ b/lld/test/wasm/cooperative-threading.s
@@ -1,15 +1,15 @@
-# Test that --cooperative-multithreading uses the libcall ABI naming for
+# Test that --cooperative-threading uses the libcall ABI naming for
# thread-context globals (__init_stack_pointer, __init_tls_base, etc.) and
# works without --shared-memory and atomics.
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o
+# RUN: wasm-ld --cooperative-threading -no-gc-sections -o %t.wasm %t.o
# RUN: obj2yaml %t.wasm | FileCheck %s
# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS
-# Test that --cooperative-multithreading and --shared-memory are mutually exclusive.
-# RUN: not wasm-ld --cooperative-multithreading --shared-memory %t.o -o %t2.wasm 2>&1 | FileCheck %s --check-prefix=INCOMPAT
-# INCOMPAT: --cooperative-multithreading is incompatible with --shared-memory
+# Test that --cooperative-threading and --shared-memory are mutually exclusive.
+# RUN: not wasm-ld --cooperative-threading --shared-memory %t.o -o %t2.wasm 2>&1 | FileCheck %s --check-prefix=INCOMPAT
+# INCOMPAT: --cooperative-threading is incompatible with --shared-memory
.globl __wasm_get_tls_base
__wasm_get_tls_base:
diff --git a/lld/test/wasm/stack-pointer-abi.s b/lld/test/wasm/stack-pointer-abi.s
index c8c6370dbc7ff..fbae0475bcba2 100644
--- a/lld/test/wasm/stack-pointer-abi.s
+++ b/lld/test/wasm/stack-pointer-abi.s
@@ -1,5 +1,5 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --cooperative-multithreading --no-gc-sections -o %t.libcall.wasm %t.o
+# RUN: wasm-ld --cooperative-threading --no-gc-sections -o %t.libcall.wasm %t.o
# RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL
# RUN: wasm-ld --no-gc-sections -o %t.global.wasm %t.o
# RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL
diff --git a/lld/test/wasm/thread-context-abi-mismatch.s b/lld/test/wasm/thread-context-abi-mismatch.s
index a817ca4407aab..3debc1de662a1 100644
--- a/lld/test/wasm/thread-context-abi-mismatch.s
+++ b/lld/test/wasm/thread-context-abi-mismatch.s
@@ -3,9 +3,9 @@
# as an indication that the global thread context ABI is being used.
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: not wasm-ld --cooperative-multithreading %t.o -o %t.wasm 2>&1 | FileCheck %s
+# RUN: not wasm-ld --cooperative-threading %t.o -o %t.wasm 2>&1 | FileCheck %s
-# CHECK: object file uses globals for thread context, but --cooperative-multithreading was specified
+# CHECK: object file uses globals for thread context, but --cooperative-threading was specified
.globl _start
_start:
.functype _start () -> ()
diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s
index b8d8935dbf766..d8fb1c5e8a9ca 100644
--- a/lld/test/wasm/tls-libcall.s
+++ b/lld/test/wasm/tls-libcall.s
@@ -1,5 +1,5 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o
+# RUN: wasm-ld --cooperative-threading -no-gc-sections -o %t.wasm %t.o
# RUN: obj2yaml %t.wasm | FileCheck %s
# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 70c32d60831ee..517789b2d3494 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -65,7 +65,7 @@ struct Config {
bool growableTable;
bool gcSections;
llvm::StringSet<> keepSections;
- bool cooperativeMultithreading;
+ bool cooperativeThreading;
bool libcallThreadContext;
std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport;
std::optional<llvm::StringRef> memoryExport;
@@ -137,7 +137,7 @@ struct Config {
llvm::SmallVector<uint8_t, 0> buildIdVector;
bool isMultithreaded() const {
- return sharedMemory || cooperativeMultithreading;
+ return sharedMemory || cooperativeThreading;
}
};
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 347f78b342513..90c60da814114 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -561,8 +561,8 @@ static void readConfigs(opt::InputArgList &args) {
ctx.arg.soName = args.getLastArgValue(OPT_soname);
ctx.arg.importTable = args.hasArg(OPT_import_table);
ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
- ctx.arg.cooperativeMultithreading =
- args.hasArg(OPT_cooperative_multithreading);
+ ctx.arg.cooperativeThreading =
+ args.hasArg(OPT_cooperative_threading);
;
ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
if (ctx.arg.ltoo > 3)
@@ -757,10 +757,10 @@ static void setConfigs() {
if (!ctx.arg.memoryExport.has_value() && !ctx.arg.memoryImport.has_value()) {
ctx.arg.memoryExport = memoryName;
}
- if (ctx.arg.cooperativeMultithreading) {
+ if (ctx.arg.cooperativeThreading) {
if (ctx.arg.sharedMemory)
error(
- "--cooperative-multithreading is incompatible with --shared-memory");
+ "--cooperative-threading is incompatible with --shared-memory");
ctx.arg.libcallThreadContext = true;
}
}
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index 6d18a0400ef97..bd46794e067b3 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -238,7 +238,7 @@ def page_size: JJ<"page-size=">,
def initial_memory: JJ<"initial-memory=">,
HelpText<"Initial size of the linear memory">;
-def cooperative_multithreading: FF<"cooperative-multithreading">,
+def cooperative_threading: FF<"cooperative-threading">,
HelpText<"Enable cooperative multithreading.">;
def max_memory: JJ<"max-memory=">,
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 9f68432e1dc33..42c0a48d0defb 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -650,7 +650,7 @@ void Writer::populateTargetFeatures() {
sym->importModule && sym->importModule == "env";
}))
error(fileName + ": object file uses globals for thread context, "
- "but --cooperative-multithreading was specified");
+ "but --cooperative-threading was specified");
}
if (inferFeatures)
@@ -1060,7 +1060,7 @@ OutputSegment *Writer::createOutputSegment(StringRef name) {
// TLS segments, so that they can be reused, and for .bss segments, which
// don't need to be included in the binary at all.
bool needsPassiveInit =
- ctx.arg.sharedMemory || (ctx.arg.cooperativeMultithreading &&
+ ctx.arg.sharedMemory || (ctx.arg.cooperativeThreading &&
(s->isTLS() || s->name.starts_with(".bss")));
if (needsPassiveInit)
s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
>From e1218c4dcf262270495b036aff82ce1f372d2a1f Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 8 Jun 2026 09:39:07 +0100
Subject: [PATCH 14/15] Simplify comment
---
lld/wasm/Driver.cpp | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 90c60da814114..dd2fcdd871940 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -1027,15 +1027,11 @@ static void createOptionalSymbols() {
if (ctx.sym.firstPageEnd)
ctx.sym.firstPageEnd->setVA(ctx.arg.pageSize);
- // For non-multithreaded programs we still need to define __tls_base since we
- // allow object files built with TLS to be linked into single threaded
- // programs, and such object files can contain references to this symbol.
- //
- // However, in this case __tls_base is immutable and points directly to the
- // start of the `.tdata` static segment.
- //
- // __tls_size and __tls_align are not needed in this case since they are only
- // needed for __wasm_init_tls (which we do not create in this case).
+ // TLS object files may be linked into single-threaded programs, so
+ // __tls_base must always be defined. In this case it is immutable and points
+ // directly to the start of the `.tdata` segment. __tls_size and __tls_align
+ // are omitted since they are only used by __wasm_init_tls, which is not created
+ // in this case.
if (!ctx.sym.tlsBase)
ctx.sym.tlsBase = createOptionalGlobal("__tls_base", false);
}
>From bec4064356994e163c59cb91f8a5d15994d1a608 Mon Sep 17 00:00:00 2001
From: Sy Brand <sy.brand at fastly.com>
Date: Mon, 8 Jun 2026 09:40:43 +0100
Subject: [PATCH 15/15] Fix test
---
lld/test/wasm/cooperative-threading.s | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lld/test/wasm/cooperative-threading.s b/lld/test/wasm/cooperative-threading.s
index 89f3ebc82864c..39df73858b6da 100644
--- a/lld/test/wasm/cooperative-threading.s
+++ b/lld/test/wasm/cooperative-threading.s
@@ -56,7 +56,7 @@ tls2:
# CHECK: - Type: MEMORY
# CHECK-NEXT: Memories:
# CHECK-NEXT: - Minimum: 0x2
-# CHECK-NOT: Shared: false
+# CHECK-NOT: Shared
# Globals should use the libcall ABI naming, not the global ABI.
# CHECK: GlobalNames:
More information about the cfe-commits
mailing list