https://github.com/TartanLlama updated 
https://github.com/llvm/llvm-project/pull/200855

>From 83c5f9ab46715c25d95e0bfe6b6d5b0e1e2dd5b0 Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Sun, 31 May 2026 17:23:22 +0100
Subject: [PATCH 01/11] Cooperative multithreading changes

---
 clang/lib/Driver/ToolChains/WebAssembly.cpp   |  8 +-
 clang/test/Driver/wasm-toolchain.c            |  7 ++
 lld/test/wasm/cooperative-multithreading.s    | 81 +++++++++++++++++++
 lld/test/wasm/thread-context-abi-mismatch.s   |  4 +-
 lld/wasm/Config.h                             |  6 ++
 lld/wasm/Driver.cpp                           | 11 ++-
 lld/wasm/Options.td                           |  5 +-
 lld/wasm/Relocations.cpp                      |  2 +-
 lld/wasm/SyntheticSections.cpp                | 20 ++---
 lld/wasm/Writer.cpp                           | 45 +++++++----
 .../WebAssembly/WebAssemblySubtarget.cpp      |  7 +-
 .../Target/WebAssembly/WebAssemblySubtarget.h |  4 +
 .../WebAssembly/WebAssemblyTargetMachine.cpp  | 11 ++-
 .../WebAssembly/cooperative-strip-tls.ll      | 20 +++++
 .../WebAssembly/target-features-tls.ll        |  1 +
 15 files changed, 193 insertions(+), 39 deletions(-)
 create mode 100644 lld/test/wasm/cooperative-multithreading.s
 create mode 100644 llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll

diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp 
b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index 4c1cd937e81aa..ce5463b167a58 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -88,8 +88,8 @@ static bool WantsPthread(const llvm::Triple &Triple, const 
ArgList &Args) {
   return WantsPthread;
 }
 
-static bool WantsLibcallThreadContext(const llvm::Triple &Triple,
-                                      const ArgList &Args) {
+static bool WantsCooperativeMultithreading(const llvm::Triple &Triple,
+                                           const ArgList &Args) {
   return Triple.getOS() == llvm::Triple::WASIp3;
 }
 
@@ -174,8 +174,8 @@ void wasm::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
 
-  if (WantsLibcallThreadContext(ToolChain.getTriple(), Args))
-    CmdArgs.push_back("--libcall-thread-context");
+  if (WantsCooperativeMultithreading(ToolChain.getTriple(), Args))
+    CmdArgs.push_back("--cooperative-multithreading");
 
   if (WantsPthread(ToolChain.getTriple(), Args))
     CmdArgs.push_back("--shared-memory");
diff --git a/clang/test/Driver/wasm-toolchain.c 
b/clang/test/Driver/wasm-toolchain.c
index 29a94aeec77a9..40d75da3166d9 100644
--- a/clang/test/Driver/wasm-toolchain.c
+++ b/clang/test/Driver/wasm-toolchain.c
@@ -303,3 +303,10 @@
 // RUN:   | FileCheck -check-prefix=LINK_WALI_BASIC %s
 // LINK_WALI_BASIC: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_WALI_BASIC: wasm-ld{{.*}}" "-L/foo/lib/wasm32-linux-muslwali" "crt1.o" 
"[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out"
+
+// Test that `wasm32-wasip3` passes `--cooperative-multithreading` to the 
linker.
+
+// RUN: %clang -### --target=wasm32-wasip3 -fuse-ld=lld %s --sysroot /foo 2>&1 
\
+// RUN:   | FileCheck -check-prefix=LINK_WASIP3_COOP %s
+// LINK_WASIP3_COOP: wasm-ld{{.*}}" {{.*}} "--cooperative-multithreading"
+// LINK_WASIP3_COOP-NOT: "--libcall-thread-context"
diff --git a/lld/test/wasm/cooperative-multithreading.s 
b/lld/test/wasm/cooperative-multithreading.s
new file mode 100644
index 0000000000000..cb41dd392d5e2
--- /dev/null
+++ b/lld/test/wasm/cooperative-multithreading.s
@@ -0,0 +1,81 @@
+# Test that --cooperative-multithreading uses the libcall ABI naming for
+# thread-context globals (__init_stack_pointer, __init_tls_base, etc.) and
+# works without --shared-memory and atomics.
+
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
+# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o
+# RUN: obj2yaml %t.wasm | FileCheck %s
+# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | 
FileCheck %s --check-prefix=DIS
+
+.globl         __wasm_get_tls_base
+__wasm_get_tls_base:
+    .functype   __wasm_get_tls_base () -> (i32)
+    i32.const 0
+    end_function
+
+.globl _start
+_start:
+  .functype _start () -> (i32)
+  call __wasm_get_tls_base
+  i32.const tls1@TLSREL
+  i32.add
+  i32.load 0
+  call __wasm_get_tls_base
+  i32.const tls2@TLSREL
+  i32.add
+  i32.load 0
+  i32.add
+  end_function
+
+.section  .tdata.tls1,"",@
+.globl  tls1
+tls1:
+  .int32  1
+  .size tls1, 4
+
+.section  .tdata.tls2,"",@
+.globl  tls2
+tls2:
+  .int32  2
+  .size tls2, 4
+
+.section  .custom_section.target_features,"",@
+  .int8 2
+  .int8 43
+  .int8 11
+  .ascii  "bulk-memory"
+  .int8 43
+  .int8 7
+  .ascii  "atomics"
+
+# Memory must NOT be marked as shared.
+# CHECK:      - Type:            MEMORY
+# CHECK-NEXT:   Memories:
+# CHECK-NEXT:     - Minimum:         0x2
+# CHECK-NOT:       Shared:          false
+
+# Globals should use the libcall ABI naming, not the global ABI.
+# CHECK:      GlobalNames:
+# CHECK-NEXT:      - Index:           0
+# CHECK-NEXT:        Name:            __init_stack_pointer
+# CHECK-NEXT:      - Index:           1
+# CHECK-NEXT:        Name:            __init_tls_base
+# CHECK-NEXT:      - Index:           2
+# CHECK-NEXT:        Name:            __tls_size
+# CHECK-NEXT:      - Index:           3
+# CHECK-NEXT:        Name:            __tls_align
+
+# DIS-LABEL: <__wasm_init_memory>:
+
+# DIS-LABEL: <_start>:
+# DIS-EMPTY:
+# DIS-NEXT:       call    {{[0-9]+}}
+# DIS-NEXT:       i32.const       0
+# DIS-NEXT:       i32.add
+# DIS-NEXT:       i32.load        0
+# DIS-NEXT:       call    {{[0-9]+}}
+# DIS-NEXT:       i32.const       4
+# DIS-NEXT:       i32.add
+# DIS-NEXT:       i32.load        0
+# DIS-NEXT:       i32.add
+# DIS-NEXT:       end
diff --git a/lld/test/wasm/thread-context-abi-mismatch.s 
b/lld/test/wasm/thread-context-abi-mismatch.s
index 069534cbe5762..acab6fd59d9b7 100644
--- a/lld/test/wasm/thread-context-abi-mismatch.s
+++ b/lld/test/wasm/thread-context-abi-mismatch.s
@@ -4,9 +4,9 @@
 
 # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
 # RUN: not wasm-ld --libcall-thread-context %t.o -o %t.wasm 2>&1 | FileCheck %s
+# RUN: not wasm-ld --cooperative-multithreading %t.o -o %t.wasm 2>&1 | 
FileCheck %s
 
-# CHECK: object file uses globals for thread context, but 
--libcall-thread-context was specified
-
+# CHECK: object file uses globals for thread context, but 
--libcall-thread-context or --cooperative-multithreading was specified
 .globl _start
 _start:
   .functype _start () -> ()
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 71a378a412e9e..873d25d130424 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -46,6 +46,8 @@ enum class UnresolvedPolicy { ReportError, Warn, Ignore, 
ImportDynamic };
 // For --build-id.
 enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid };
 
+enum class ThreadModel { Single, Cooperative, SharedMemory };
+
 // This struct contains the global configuration for the linker.
 // Most fields are direct mapping from the command line options
 // and such fields have the same name as the corresponding options.
@@ -65,6 +67,7 @@ struct Config {
   bool growableTable;
   bool gcSections;
   llvm::StringSet<> keepSections;
+  bool cooperativeMultithreading;
   bool libcallThreadContext;
   std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport;
   std::optional<llvm::StringRef> memoryExport;
@@ -134,6 +137,9 @@ struct Config {
   std::optional<std::vector<std::string>> features;
   std::optional<std::vector<std::string>> extraFeatures;
   llvm::SmallVector<uint8_t, 0> buildIdVector;
+
+  ThreadModel threadModel = ThreadModel::Single;
+  bool isMultithreaded() const { return threadModel != ThreadModel::Single; }
 };
 
 // The Ctx object hold all other (non-configuration) global state.
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index fe1e2eec95037..20b398fc39a0c 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -561,6 +561,7 @@ static void readConfigs(opt::InputArgList &args) {
   ctx.arg.soName = args.getLastArgValue(OPT_soname);
   ctx.arg.importTable = args.hasArg(OPT_import_table);
   ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
+  ctx.arg.cooperativeMultithreading = 
args.hasArg(OPT_cooperative_multithreading);
   ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context);
   ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
   if (ctx.arg.ltoo > 3)
@@ -755,6 +756,12 @@ static void setConfigs() {
   if (!ctx.arg.memoryExport.has_value() && !ctx.arg.memoryImport.has_value()) {
     ctx.arg.memoryExport = memoryName;
   }
+
+  if (ctx.arg.cooperativeMultithreading) {
+    ctx.arg.threadModel = ThreadModel::Cooperative;
+    ctx.arg.libcallThreadContext = true;
+  } else if (ctx.arg.sharedMemory)
+    ctx.arg.threadModel = ThreadModel::SharedMemory;
 }
 
 // Some command line options or some combinations of them are not allowed.
@@ -964,7 +971,7 @@ static void createSyntheticSymbols() {
         createGlobalVariable(stack_pointer_name, 
!ctx.arg.libcallThreadContext);
   }
 
-  if (ctx.arg.sharedMemory) {
+  if (ctx.arg.isMultithreaded()) {
     // TLS symbols are all hidden/dso-local
     auto tls_base_name =
         ctx.arg.libcallThreadContext ? "__init_tls_base" : "__tls_base";
@@ -1028,7 +1035,7 @@ static void createOptionalSymbols() {
   //
   // __tls_size and __tls_align are not needed in this case since they are only
   // needed for __wasm_init_tls (which we do not create in this case).
-  if (!ctx.arg.sharedMemory)
+  if (!ctx.sym.tlsBase)
     ctx.sym.tlsBase = createOptionalGlobal("__tls_base", false);
 }
 
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index 144eee33061e1..8ad386ca0ce39 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -238,9 +238,12 @@ def page_size: JJ<"page-size=">,
 def initial_memory: JJ<"initial-memory=">,
   HelpText<"Initial size of the linear memory">;
 
+def cooperative_multithreading: FF<"cooperative-multithreading">,
+  HelpText<"Enable cooperative multithreading.">;
+
 def libcall_thread_context: FF<"libcall-thread-context">,
   HelpText<"Use library calls for thread context access instead of globals.">;
-  
+
 def max_memory: JJ<"max-memory=">,
   HelpText<"Maximum size of the linear memory">;
 
diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp
index a1840abe88b3a..cb597fdeffcf3 100644
--- a/lld/wasm/Relocations.cpp
+++ b/lld/wasm/Relocations.cpp
@@ -125,7 +125,7 @@ void scanRelocations(InputChunk *chunk) {
       // In single-threaded builds TLS is lowered away and TLS data can be
       // merged with normal data and allowing TLS relocation in non-TLS
       // segments.
-      if (ctx.arg.sharedMemory) {
+      if (ctx.arg.isMultithreaded()) {
         if (!sym->isTLS()) {
           error(toString(file) + ": relocation " +
                 relocTypeToString(reloc.Type) +
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index d1a01c7ec3f9d..a465f2fb590b3 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -57,7 +57,7 @@ void writeGetTLSBase(const Ctx &ctx, raw_ostream &os) {
     writeU8(os, WASM_OPCODE_CALL, "call");
     writeUleb128(os, ctx.sym.getTLSBase->getFunctionIndex(), "function index");
   } else {
-    writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_SET");
+    writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
     writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base");
   }
 }
@@ -265,11 +265,11 @@ void ImportSection::writeBody() {
     import.Kind = WASM_EXTERNAL_MEMORY;
     import.Memory.Flags = 0;
     import.Memory.Minimum = out.memorySec->numMemoryPages;
-    if (out.memorySec->maxMemoryPages != 0 || ctx.arg.sharedMemory) {
+    if (out.memorySec->maxMemoryPages != 0 || ctx.arg.threadModel == 
ThreadModel::SharedMemory) {
       import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
       import.Memory.Maximum = out.memorySec->maxMemoryPages;
     }
-    if (ctx.arg.sharedMemory)
+    if (ctx.arg.threadModel == ThreadModel::SharedMemory)
       import.Memory.Flags |= WASM_LIMITS_FLAG_IS_SHARED;
     if (is64)
       import.Memory.Flags |= WASM_LIMITS_FLAG_IS_64;
@@ -406,12 +406,12 @@ void TableSection::assignIndexes() {
 void MemorySection::writeBody() {
   raw_ostream &os = bodyOutputStream;
 
-  bool hasMax = maxMemoryPages != 0 || ctx.arg.sharedMemory;
+  bool hasMax = maxMemoryPages != 0 || ctx.arg.threadModel == 
ThreadModel::SharedMemory;
   writeUleb128(os, 1, "memory count");
   unsigned flags = 0;
   if (hasMax)
     flags |= WASM_LIMITS_FLAG_HAS_MAX;
-  if (ctx.arg.sharedMemory)
+  if (ctx.arg.threadModel == ThreadModel::SharedMemory)
     flags |= WASM_LIMITS_FLAG_IS_SHARED;
   if (ctx.arg.is64.value_or(false))
     flags |= WASM_LIMITS_FLAG_IS_64;
@@ -532,7 +532,7 @@ void GlobalSection::writeBody() {
         mutable_ = true;
       // With multi-threading any TLS globals must be mutable since they get
       // set during `__wasm_apply_global_tls_relocs`
-      if (ctx.arg.sharedMemory && sym->isTLS())
+      if (ctx.arg.isMultithreaded() && sym->isTLS())
         mutable_ = true;
     }
     WasmGlobalType type{itype, mutable_};
@@ -569,10 +569,10 @@ void GlobalSection::writeBody() {
     } else {
       WasmInitExpr initExpr;
       if (auto *d = dyn_cast<DefinedData>(sym))
-        // In the sharedMemory case TLS globals are set during
-        // `__wasm_apply_global_tls_relocs`, but in the non-shared case
+        // In the multithreaded case, TLS globals are set during
+        // `__wasm_apply_global_tls_relocs`, but in the single-threaded case
         // we know the absolute value at link time.
-        initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.sharedMemory), 
is64);
+        initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), 
is64);
       else if (auto *f = dyn_cast<FunctionSymbol>(sym))
         initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64);
       else {
@@ -680,7 +680,7 @@ bool DataCountSection::isNeeded() const {
   // instructions are not yet supported in input files.  However, in the case
   // of shared memory, lld itself will generate these instructions as part of
   // `__wasm_init_memory`. See Writer::createInitMemoryFunction.
-  return numSegments && ctx.arg.sharedMemory;
+  return numSegments && ctx.arg.isMultithreaded();
 }
 
 void LinkingSection::writeBody() {
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 688bb829e1c42..79e3c46410e8d 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -425,13 +425,13 @@ void Writer::layoutMemory() {
   // Even in the absense of any actual TLS data, this symbol can still be
   // referenced (for example by __builtin_thread_pointer, which should not
   // return NULL).
-  if (!ctx.arg.sharedMemory && ctx.sym.tlsBase) {
+  if (!ctx.arg.isMultithreaded() && ctx.sym.tlsBase) {
     auto *tlsBase = cast<DefinedGlobal>(ctx.sym.tlsBase);
     setGlobalPtr(tlsBase, fixedTLSBase);
   }
 
   // Make space for the memory initialization flag
-  if (ctx.arg.sharedMemory && hasPassiveInitializedSegments()) {
+  if (ctx.arg.threadModel == ThreadModel::SharedMemory && 
hasPassiveInitializedSegments()) {
     memoryPtr = alignTo(memoryPtr, 4);
     ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol(
         "__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN);
@@ -519,7 +519,7 @@ void Writer::layoutMemory() {
 
   // If no maxMemory config was supplied but we are building with
   // shared memory, we need to pick a sensible upper limit.
-  if (ctx.arg.sharedMemory && maxMemory == 0) {
+  if (ctx.arg.threadModel == ThreadModel::SharedMemory && maxMemory == 0) {
     if (ctx.isPic)
       maxMemory = maxMemorySetting;
     else
@@ -1057,7 +1057,15 @@ static StringRef getOutputDataSegmentName(const 
InputChunk &seg) {
 OutputSegment *Writer::createOutputSegment(StringRef name) {
   LLVM_DEBUG(dbgs() << "new segment: " << name << "\n");
   OutputSegment *s = make<OutputSegment>(name);
-  if (ctx.arg.sharedMemory)
+  // In the shared memory case, all data segments must be passive since they
+  // will be initialized once by the main thread and then shared with other
+  // threads. In the non-shared memory case, we use passive segments only for
+  // TLS segments, so that they can be reused, and for .bss segments, which
+  // don't need to be included in the binary at all.
+  bool needsPassiveInit = ctx.arg.threadModel == ThreadModel::SharedMemory ||
+                        (ctx.arg.threadModel == ThreadModel::Cooperative &&
+                         (s->isTLS() || s->name.starts_with(".bss")));
+  if (needsPassiveInit)
     s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
   if (!ctx.arg.relocatable && name.starts_with(".bss"))
     s->isBss = true;
@@ -1198,7 +1206,7 @@ void Writer::createSyntheticInitFunctions() {
     }
   }
 
-  if (ctx.arg.sharedMemory) {
+  if (ctx.arg.isMultithreaded()) {
     if (out.globalSec->needsTLSRelocations()) {
       ctx.sym.applyGlobalTLSRelocs = symtab->addSyntheticFunction(
           "__wasm_apply_global_tls_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN,
@@ -1247,7 +1255,7 @@ void Writer::createInitMemoryFunction() {
   assert(ctx.sym.initMemory);
   assert(hasPassiveInitializedSegments());
   uint64_t flagAddress;
-  if (ctx.arg.sharedMemory) {
+  if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
     assert(ctx.sym.initMemoryFlag);
     flagAddress = ctx.sym.initMemoryFlag->getVA();
   }
@@ -1315,7 +1323,7 @@ void Writer::createInitMemoryFunction() {
       }
     };
 
-    if (ctx.arg.sharedMemory) {
+    if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
       // With PIC code we cache the flag address in local 0
       if (ctx.isPic) {
         writeUleb128(os, 1, "num local decls");
@@ -1378,7 +1386,7 @@ void Writer::createInitMemoryFunction() {
         // When we initialize the TLS segment we also set the TLS base.
         // This allows the runtime to use this static copy of the TLS data
         // for the first/main thread.
-        if (ctx.arg.sharedMemory && s->isTLS()) {
+        if (ctx.arg.isMultithreaded() && s->isTLS()) {
           if (ctx.isPic) {
             // Cache the result of the addionion in local 0
             writeU8(os, WASM_OPCODE_LOCAL_TEE, "local.tee");
@@ -1410,7 +1418,7 @@ void Writer::createInitMemoryFunction() {
       }
     }
 
-    if (ctx.arg.sharedMemory) {
+    if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
       // Set flag to 2 to mark end of initialization
       writeGetFlagAddress();
       writeI32Const(os, 2, "flag value");
@@ -1449,7 +1457,7 @@ void Writer::createInitMemoryFunction() {
       if (needsPassiveInitialization(s) && !s->isBss) {
         // The TLS region should not be dropped since its is needed
         // during the initialization of each thread (__wasm_init_tls).
-        if (ctx.arg.sharedMemory && s->isTLS())
+        if (ctx.arg.isMultithreaded() && s->isTLS())
           continue;
         // data.drop instruction
         writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
@@ -1502,7 +1510,7 @@ void Writer::createApplyDataRelocationsFunction() {
     writeUleb128(os, 0, "num locals");
     bool generated = false;
     for (const OutputSegment *seg : segments)
-      if (!ctx.arg.sharedMemory || !seg->isTLS())
+      if (!ctx.arg.isMultithreaded() || !seg->isTLS())
         for (const InputChunk *inSeg : seg->inputSegments)
           generated |= inSeg->generateRelocationCode(os);
 
@@ -1656,10 +1664,17 @@ void Writer::createInitTLSFunction() {
 
     writeUleb128(os, 0, "num locals");
     if (tlsSeg) {
-      writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
-      writeUleb128(os, 0, "local index");
-
       writeSetTLSBase(ctx, os);
+      /*
+      // In cooperative threading mode the runtime is responsible for calling
+      // __wasm_set_tls_base separately; __wasm_init_tls only copies the TLS
+      // template data.
+      if (!ctx.arg.libcallThreadContext) {
+        writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
+        writeUleb128(os, 0, "local index");
+        writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set");
+        writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index");
+      }*/
 
       // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend
       // op.
@@ -1791,7 +1806,7 @@ void Writer::run() {
   // `__memory_base` import.  Unless we support the extended const expression 
we
   // can't do addition inside the constant expression, so we much combine the
   // segments into a single one that can live at `__memory_base`.
-  if (ctx.isPic && !ctx.arg.extendedConst && !ctx.arg.sharedMemory) {
+  if (ctx.isPic && !ctx.arg.extendedConst && ctx.arg.threadModel != 
ThreadModel::SharedMemory) {
     // In shared memory mode all data segments are passive and initialized
     // via __wasm_init_memory.
     log("-- combineOutputSegments");
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 6326b7d76db82..9dea29fb0205d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -40,9 +40,12 @@ 
WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU,
 
   ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
 
-  // WASIP3 implies using the libcall thread context.
-  if (TargetTriple.getOS() == Triple::WASIp3)
+  // WASIP3 uses cooperative multithreading, which implies using libcall
+  // thread context.
+  if (TargetTriple.getOS() == Triple::WASIp3) {
+    HasCooperativeMultithreading = true;
     HasLibcallThreadContext = true;
+  }
 
   FeatureBitset Bits = getFeatureBits();
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h 
b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 5c6f4cb5b36ff..f637ce59ebfce 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -52,6 +52,7 @@ class WebAssemblySubtarget final : public 
WebAssemblyGenSubtargetInfo {
   bool HasExtendedConst = false;
   bool HasFP16 = false;
   bool HasGC = false;
+  bool HasCooperativeMultithreading = false;
   bool HasLibcallThreadContext = false;
   bool HasMultiMemory = false;
   bool HasMultivalue = false;
@@ -117,6 +118,9 @@ class WebAssemblySubtarget final : public 
WebAssemblyGenSubtargetInfo {
   bool hasExtendedConst() const { return HasExtendedConst; }
   bool hasFP16() const { return HasFP16; }
   bool hasGC() const { return HasGC; }
+  bool hasCooperativeMultithreading() const {
+    return HasCooperativeMultithreading;
+  }
   bool hasLibcallThreadContext() const { return HasLibcallThreadContext; }
   bool hasMultiMemory() const { return HasMultiMemory; }
   bool hasMultivalue() const { return HasMultivalue; }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 1361dd99b7072..ee15c9093ff3b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -283,10 +283,17 @@ class CoalesceFeaturesAndStripAtomics final : public 
ModulePass {
     bool StrippedAtomics = false;
     bool StrippedTLS = false;
 
+    // In cooperative threading mode, thread locals are meaningful even without
+    // atomics.
+    bool CooperativeThreading =
+        WasmTM->getSubtargetImpl()->hasCooperativeMultithreading();
+
     if (!Features[WebAssembly::FeatureAtomics]) {
       StrippedAtomics = stripAtomics(M);
-      StrippedTLS = stripThreadLocals(M);
-    } else if (!Features[WebAssembly::FeatureBulkMemory]) {
+      if (!CooperativeThreading)
+        StrippedTLS = stripThreadLocals(M);
+    }
+    if (!Features[WebAssembly::FeatureBulkMemory]) {
       StrippedTLS |= stripThreadLocals(M);
     }
 
diff --git a/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll 
b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll
new file mode 100644
index 0000000000000..46ac1cd0509b7
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll
@@ -0,0 +1,20 @@
+; Test that in cooperative threading mode (wasm32-wasip3), thread-local 
variables
+; are NOT stripped even when atomics are absent.  In non-cooperative mode
+; (wasm32-unknown-unknown) TLS is stripped to .bss when atomics are absent.
+
+; RUN: llc < %s -mtriple=wasm32-wasip3 -mcpu=mvp -mattr=-atomics,+bulk-memory \
+; RUN:   | FileCheck %s --check-prefixes=COOP
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=mvp 
-mattr=-atomics,+bulk-memory \
+; RUN:   | FileCheck %s --check-prefixes=PLAIN
+
+target triple = "wasm32-unknown-unknown"
+
+@foo = internal thread_local global i32 0
+
+; Cooperative threading: TLS is preserved — the section stays .tbss.
+; COOP:     .tbss.foo
+; COOP-NOT: .bss.foo
+
+; Non-cooperative: TLS stripped
+; PLAIN:     .bss.foo
+; PLAIN-NOT: .tbss.foo
diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll 
b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
index 4abe01a73aeee..92333f3c7b9f1 100644
--- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -32,3 +32,4 @@ target triple = "wasm32-unknown-unknown"
 ; BULK-MEM-NEXT: .int8 15
 ; BULK-MEM-NEXT: .ascii "bulk-memory-opt"
 ; BULK-MEM-NEXT: .tbss.foo,"T",@
+

>From 8fea1e2a54c8055132798269ca8b619729ea9db2 Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Mon, 1 Jun 2026 16:17:23 +0100
Subject: [PATCH 02/11] Cleanup createInitTLSFunction

---
 lld/wasm/Writer.cpp | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 79e3c46410e8d..aa6b84c6f925f 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -1664,17 +1664,9 @@ void Writer::createInitTLSFunction() {
 
     writeUleb128(os, 0, "num locals");
     if (tlsSeg) {
+      writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
+      writeUleb128(os, 0, "local index");
       writeSetTLSBase(ctx, os);
-      /*
-      // In cooperative threading mode the runtime is responsible for calling
-      // __wasm_set_tls_base separately; __wasm_init_tls only copies the TLS
-      // template data.
-      if (!ctx.arg.libcallThreadContext) {
-        writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
-        writeUleb128(os, 0, "local index");
-        writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set");
-        writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index");
-      }*/
 
       // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend
       // op.

>From ca1b2937d96037de1007677ca739a09162cae96f Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Mon, 1 Jun 2026 16:19:54 +0100
Subject: [PATCH 03/11] Remove newline

---
 llvm/test/CodeGen/WebAssembly/target-features-tls.ll | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll 
b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
index 92333f3c7b9f1..4abe01a73aeee 100644
--- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -32,4 +32,3 @@ target triple = "wasm32-unknown-unknown"
 ; BULK-MEM-NEXT: .int8 15
 ; BULK-MEM-NEXT: .ascii "bulk-memory-opt"
 ; BULK-MEM-NEXT: .tbss.foo,"T",@
-

>From e76828df6b96738c4b49708ac7888a63faa17d3b Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Mon, 1 Jun 2026 16:35:47 +0100
Subject: [PATCH 04/11] Replace libcall-thread-context flag

---
 lld/test/wasm/stack-pointer-abi.s           | 2 +-
 lld/test/wasm/thread-context-abi-mismatch.s | 3 +--
 lld/test/wasm/tls-libcall.s                 | 2 +-
 lld/wasm/Driver.cpp                         | 1 -
 lld/wasm/Options.td                         | 3 ---
 lld/wasm/Writer.cpp                         | 2 +-
 6 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/lld/test/wasm/stack-pointer-abi.s 
b/lld/test/wasm/stack-pointer-abi.s
index 869f972710991..fbae0475bcba2 100644
--- a/lld/test/wasm/stack-pointer-abi.s
+++ b/lld/test/wasm/stack-pointer-abi.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --libcall-thread-context --no-gc-sections -o %t.libcall.wasm 
%t.o
+# RUN: wasm-ld --cooperative-threading --no-gc-sections -o %t.libcall.wasm %t.o
 # RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL
 # RUN: wasm-ld --no-gc-sections -o %t.global.wasm %t.o
 # RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL
diff --git a/lld/test/wasm/thread-context-abi-mismatch.s 
b/lld/test/wasm/thread-context-abi-mismatch.s
index acab6fd59d9b7..a817ca4407aab 100644
--- a/lld/test/wasm/thread-context-abi-mismatch.s
+++ b/lld/test/wasm/thread-context-abi-mismatch.s
@@ -3,10 +3,9 @@
 # as an indication that the global thread context ABI is being used.
 
 # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: not wasm-ld --libcall-thread-context %t.o -o %t.wasm 2>&1 | FileCheck %s
 # RUN: not wasm-ld --cooperative-multithreading %t.o -o %t.wasm 2>&1 | 
FileCheck %s
 
-# CHECK: object file uses globals for thread context, but 
--libcall-thread-context or --cooperative-multithreading was specified
+# CHECK: object file uses globals for thread context, but 
--cooperative-multithreading was specified
 .globl _start
 _start:
   .functype _start () -> ()
diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s
index df8b8f8be0207..a0a7f37379bac 100644
--- a/lld/test/wasm/tls-libcall.s
+++ b/lld/test/wasm/tls-libcall.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --libcall-thread-context --shared-memory -no-gc-sections -o 
%t.wasm %t.o
+# RUN: wasm-ld --cooperative-threading --shared-memory -no-gc-sections -o 
%t.wasm %t.o
 # RUN: obj2yaml %t.wasm | FileCheck %s
 # RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | 
FileCheck %s --check-prefix=DIS
 
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 20b398fc39a0c..1ef4f55becc50 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -562,7 +562,6 @@ static void readConfigs(opt::InputArgList &args) {
   ctx.arg.importTable = args.hasArg(OPT_import_table);
   ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
   ctx.arg.cooperativeMultithreading = 
args.hasArg(OPT_cooperative_multithreading);
-  ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context);
   ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
   if (ctx.arg.ltoo > 3)
     error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index 8ad386ca0ce39..bd2a7a19e0887 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -241,9 +241,6 @@ def initial_memory: JJ<"initial-memory=">,
 def cooperative_multithreading: FF<"cooperative-multithreading">,
   HelpText<"Enable cooperative multithreading.">;
 
-def libcall_thread_context: FF<"libcall-thread-context">,
-  HelpText<"Use library calls for thread context access instead of globals.">;
-
 def max_memory: JJ<"max-memory=">,
   HelpText<"Maximum size of the linear memory">;
 
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index aa6b84c6f925f..2128c1b213e5c 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -653,7 +653,7 @@ void Writer::populateTargetFeatures() {
                  sym->importModule && sym->importModule == "env";
         }))
       error(fileName + ": object file uses globals for thread context, "
-                       "but --libcall-thread-context was specified");
+                       "but --cooperative-threading was specified");
   }
 
   if (inferFeatures)

>From 77c1c717e48adbe1f5800b0268a586e658dd6b5c Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Mon, 1 Jun 2026 16:36:36 +0100
Subject: [PATCH 05/11] fmt

---
 lld/wasm/Driver.cpp            |  3 ++-
 lld/wasm/SyntheticSections.cpp |  9 ++++++---
 lld/wasm/Writer.cpp            | 10 ++++++----
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 1ef4f55becc50..17781995815f5 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -561,7 +561,8 @@ static void readConfigs(opt::InputArgList &args) {
   ctx.arg.soName = args.getLastArgValue(OPT_soname);
   ctx.arg.importTable = args.hasArg(OPT_import_table);
   ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
-  ctx.arg.cooperativeMultithreading = 
args.hasArg(OPT_cooperative_multithreading);
+  ctx.arg.cooperativeMultithreading =
+      args.hasArg(OPT_cooperative_multithreading);
   ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
   if (ctx.arg.ltoo > 3)
     error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index a465f2fb590b3..6c7d46787d661 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -265,7 +265,8 @@ void ImportSection::writeBody() {
     import.Kind = WASM_EXTERNAL_MEMORY;
     import.Memory.Flags = 0;
     import.Memory.Minimum = out.memorySec->numMemoryPages;
-    if (out.memorySec->maxMemoryPages != 0 || ctx.arg.threadModel == 
ThreadModel::SharedMemory) {
+    if (out.memorySec->maxMemoryPages != 0 ||
+        ctx.arg.threadModel == ThreadModel::SharedMemory) {
       import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
       import.Memory.Maximum = out.memorySec->maxMemoryPages;
     }
@@ -406,7 +407,8 @@ void TableSection::assignIndexes() {
 void MemorySection::writeBody() {
   raw_ostream &os = bodyOutputStream;
 
-  bool hasMax = maxMemoryPages != 0 || ctx.arg.threadModel == 
ThreadModel::SharedMemory;
+  bool hasMax =
+      maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory;
   writeUleb128(os, 1, "memory count");
   unsigned flags = 0;
   if (hasMax)
@@ -572,7 +574,8 @@ void GlobalSection::writeBody() {
         // In the multithreaded case, TLS globals are set during
         // `__wasm_apply_global_tls_relocs`, but in the single-threaded case
         // we know the absolute value at link time.
-        initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), 
is64);
+        initExpr =
+            intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), is64);
       else if (auto *f = dyn_cast<FunctionSymbol>(sym))
         initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64);
       else {
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 2128c1b213e5c..cf99208456a11 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -431,7 +431,8 @@ void Writer::layoutMemory() {
   }
 
   // Make space for the memory initialization flag
-  if (ctx.arg.threadModel == ThreadModel::SharedMemory && 
hasPassiveInitializedSegments()) {
+  if (ctx.arg.threadModel == ThreadModel::SharedMemory &&
+      hasPassiveInitializedSegments()) {
     memoryPtr = alignTo(memoryPtr, 4);
     ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol(
         "__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN);
@@ -1063,8 +1064,8 @@ OutputSegment *Writer::createOutputSegment(StringRef 
name) {
   // TLS segments, so that they can be reused, and for .bss segments, which
   // don't need to be included in the binary at all.
   bool needsPassiveInit = ctx.arg.threadModel == ThreadModel::SharedMemory ||
-                        (ctx.arg.threadModel == ThreadModel::Cooperative &&
-                         (s->isTLS() || s->name.starts_with(".bss")));
+                          (ctx.arg.threadModel == ThreadModel::Cooperative &&
+                           (s->isTLS() || s->name.starts_with(".bss")));
   if (needsPassiveInit)
     s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
   if (!ctx.arg.relocatable && name.starts_with(".bss"))
@@ -1798,7 +1799,8 @@ void Writer::run() {
   // `__memory_base` import.  Unless we support the extended const expression 
we
   // can't do addition inside the constant expression, so we much combine the
   // segments into a single one that can live at `__memory_base`.
-  if (ctx.isPic && !ctx.arg.extendedConst && ctx.arg.threadModel != 
ThreadModel::SharedMemory) {
+  if (ctx.isPic && !ctx.arg.extendedConst &&
+      ctx.arg.threadModel != ThreadModel::SharedMemory) {
     // In shared memory mode all data segments are passive and initialized
     // via __wasm_init_memory.
     log("-- combineOutputSegments");

>From 763392d058e4298f0bee6e66f17adb703b552a66 Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Mon, 1 Jun 2026 16:43:01 +0100
Subject: [PATCH 06/11] Correct output segments

---
 lld/wasm/Writer.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index cf99208456a11..d60dfcdaf43a6 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -1125,7 +1125,7 @@ void Writer::combineOutputSegments() {
   // This restriction does not apply when the extended const extension is
   // available: https://github.com/WebAssembly/extended-const
   assert(!ctx.arg.extendedConst);
-  assert(ctx.isPic && !ctx.arg.sharedMemory);
+  assert(ctx.isPic && !ctx.arg.isMultithreaded());
   if (segments.size() <= 1)
     return;
   OutputSegment *combined = make<OutputSegment>(".data");
@@ -1799,10 +1799,9 @@ void Writer::run() {
   // `__memory_base` import.  Unless we support the extended const expression 
we
   // can't do addition inside the constant expression, so we much combine the
   // segments into a single one that can live at `__memory_base`.
-  if (ctx.isPic && !ctx.arg.extendedConst &&
-      ctx.arg.threadModel != ThreadModel::SharedMemory) {
-    // In shared memory mode all data segments are passive and initialized
-    // via __wasm_init_memory.
+  if (ctx.isPic && !ctx.arg.extendedConst && !ctx.arg.isMultithreaded()) {
+    // In multithreaded modes (shared or cooperative), data segments may be
+    // passive and must not be combined into a single active segment.
     log("-- combineOutputSegments");
     combineOutputSegments();
   }

>From c68d4de08b8ac66b38a6d24adad0c26040aaa134 Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Mon, 1 Jun 2026 16:53:02 +0100
Subject: [PATCH 07/11] Cleanup

---
 lld/wasm/Config.h              |  6 +-----
 lld/wasm/Driver.cpp            |  5 -----
 lld/wasm/SyntheticSections.cpp |  8 ++++----
 lld/wasm/Writer.cpp            | 14 +++++++-------
 4 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 873d25d130424..af74f0f40bbdf 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -46,8 +46,6 @@ enum class UnresolvedPolicy { ReportError, Warn, Ignore, 
ImportDynamic };
 // For --build-id.
 enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid };
 
-enum class ThreadModel { Single, Cooperative, SharedMemory };
-
 // This struct contains the global configuration for the linker.
 // Most fields are direct mapping from the command line options
 // and such fields have the same name as the corresponding options.
@@ -68,7 +66,6 @@ struct Config {
   bool gcSections;
   llvm::StringSet<> keepSections;
   bool cooperativeMultithreading;
-  bool libcallThreadContext;
   std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport;
   std::optional<llvm::StringRef> memoryExport;
   bool sharedMemory;
@@ -138,8 +135,7 @@ struct Config {
   std::optional<std::vector<std::string>> extraFeatures;
   llvm::SmallVector<uint8_t, 0> buildIdVector;
 
-  ThreadModel threadModel = ThreadModel::Single;
-  bool isMultithreaded() const { return threadModel != ThreadModel::Single; }
+  bool isMultithreaded() const { return sharedMemory || 
cooperativeMultithreading; }
 };
 
 // The Ctx object hold all other (non-configuration) global state.
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 17781995815f5..b06e0fbb55eec 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -757,11 +757,6 @@ static void setConfigs() {
     ctx.arg.memoryExport = memoryName;
   }
 
-  if (ctx.arg.cooperativeMultithreading) {
-    ctx.arg.threadModel = ThreadModel::Cooperative;
-    ctx.arg.libcallThreadContext = true;
-  } else if (ctx.arg.sharedMemory)
-    ctx.arg.threadModel = ThreadModel::SharedMemory;
 }
 
 // Some command line options or some combinations of them are not allowed.
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 6c7d46787d661..753a1c7fe5c82 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -266,11 +266,11 @@ void ImportSection::writeBody() {
     import.Memory.Flags = 0;
     import.Memory.Minimum = out.memorySec->numMemoryPages;
     if (out.memorySec->maxMemoryPages != 0 ||
-        ctx.arg.threadModel == ThreadModel::SharedMemory) {
+        ctx.arg.sharedMemory) {
       import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
       import.Memory.Maximum = out.memorySec->maxMemoryPages;
     }
-    if (ctx.arg.threadModel == ThreadModel::SharedMemory)
+    if (ctx.arg.sharedMemory)
       import.Memory.Flags |= WASM_LIMITS_FLAG_IS_SHARED;
     if (is64)
       import.Memory.Flags |= WASM_LIMITS_FLAG_IS_64;
@@ -408,12 +408,12 @@ void MemorySection::writeBody() {
   raw_ostream &os = bodyOutputStream;
 
   bool hasMax =
-      maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory;
+      maxMemoryPages != 0 || ctx.arg.sharedMemory;
   writeUleb128(os, 1, "memory count");
   unsigned flags = 0;
   if (hasMax)
     flags |= WASM_LIMITS_FLAG_HAS_MAX;
-  if (ctx.arg.threadModel == ThreadModel::SharedMemory)
+  if (ctx.arg.sharedMemory)
     flags |= WASM_LIMITS_FLAG_IS_SHARED;
   if (ctx.arg.is64.value_or(false))
     flags |= WASM_LIMITS_FLAG_IS_64;
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index d60dfcdaf43a6..48145d005c117 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -431,7 +431,7 @@ void Writer::layoutMemory() {
   }
 
   // Make space for the memory initialization flag
-  if (ctx.arg.threadModel == ThreadModel::SharedMemory &&
+  if (ctx.arg.sharedMemory &&
       hasPassiveInitializedSegments()) {
     memoryPtr = alignTo(memoryPtr, 4);
     ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol(
@@ -520,7 +520,7 @@ void Writer::layoutMemory() {
 
   // If no maxMemory config was supplied but we are building with
   // shared memory, we need to pick a sensible upper limit.
-  if (ctx.arg.threadModel == ThreadModel::SharedMemory && maxMemory == 0) {
+  if (ctx.arg.sharedMemory && maxMemory == 0) {
     if (ctx.isPic)
       maxMemory = maxMemorySetting;
     else
@@ -1063,8 +1063,8 @@ OutputSegment *Writer::createOutputSegment(StringRef 
name) {
   // threads. In the non-shared memory case, we use passive segments only for
   // TLS segments, so that they can be reused, and for .bss segments, which
   // don't need to be included in the binary at all.
-  bool needsPassiveInit = ctx.arg.threadModel == ThreadModel::SharedMemory ||
-                          (ctx.arg.threadModel == ThreadModel::Cooperative &&
+  bool needsPassiveInit = ctx.arg.sharedMemory ||
+                          (ctx.arg.cooperativeMultithreading &&
                            (s->isTLS() || s->name.starts_with(".bss")));
   if (needsPassiveInit)
     s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
@@ -1256,7 +1256,7 @@ void Writer::createInitMemoryFunction() {
   assert(ctx.sym.initMemory);
   assert(hasPassiveInitializedSegments());
   uint64_t flagAddress;
-  if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
+  if (ctx.arg.sharedMemory) {
     assert(ctx.sym.initMemoryFlag);
     flagAddress = ctx.sym.initMemoryFlag->getVA();
   }
@@ -1324,7 +1324,7 @@ void Writer::createInitMemoryFunction() {
       }
     };
 
-    if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
+    if (ctx.arg.sharedMemory) {
       // With PIC code we cache the flag address in local 0
       if (ctx.isPic) {
         writeUleb128(os, 1, "num local decls");
@@ -1419,7 +1419,7 @@ void Writer::createInitMemoryFunction() {
       }
     }
 
-    if (ctx.arg.threadModel == ThreadModel::SharedMemory) {
+    if (ctx.arg.sharedMemory) {
       // Set flag to 2 to mark end of initialization
       writeGetFlagAddress();
       writeI32Const(os, 2, "flag value");

>From c1a15696ba1d5de74ca5c0404b4405cc2f148a4e Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Mon, 1 Jun 2026 17:07:29 +0100
Subject: [PATCH 08/11] Cleanup options

---
 clang/lib/Driver/ToolChains/WebAssembly.cpp | 6 +++++-
 lld/test/wasm/cooperative-multithreading.s  | 4 ++++
 lld/wasm/Config.h                           | 1 +
 lld/wasm/Driver.cpp                         | 8 ++++++--
 lld/wasm/Options.td                         | 2 +-
 5 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp 
b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index ce5463b167a58..d1e1766a0dee3 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -93,6 +93,10 @@ static bool WantsCooperativeMultithreading(const 
llvm::Triple &Triple,
   return Triple.getOS() == llvm::Triple::WASIp3;
 }
 
+static bool WantsSharedMemory(const llvm::Triple &Triple, const ArgList &Args) 
{
+  return WantsPthread(Triple, Args) && !WantsCooperativeMultithreading(Triple, 
Args);
+}
+
 void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                 const InputInfo &Output,
                                 const InputInfoList &Inputs,
@@ -177,7 +181,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   if (WantsCooperativeMultithreading(ToolChain.getTriple(), Args))
     CmdArgs.push_back("--cooperative-multithreading");
 
-  if (WantsPthread(ToolChain.getTriple(), Args))
+  if (WantsSharedMemory(ToolChain.getTriple(), Args))
     CmdArgs.push_back("--shared-memory");
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
diff --git a/lld/test/wasm/cooperative-multithreading.s 
b/lld/test/wasm/cooperative-multithreading.s
index cb41dd392d5e2..5adfaa99e40a0 100644
--- a/lld/test/wasm/cooperative-multithreading.s
+++ b/lld/test/wasm/cooperative-multithreading.s
@@ -7,6 +7,10 @@
 # RUN: obj2yaml %t.wasm | FileCheck %s
 # RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | 
FileCheck %s --check-prefix=DIS
 
+# Test that --cooperative-multithreading and --shared-memory are mutually 
exclusive.
+# RUN: not wasm-ld --cooperative-multithreading --shared-memory %t.o -o 
%t2.wasm 2>&1 | FileCheck %s --check-prefix=INCOMPAT
+# INCOMPAT: --cooperative-multithreading is incompatible with --shared-memory
+
 .globl         __wasm_get_tls_base
 __wasm_get_tls_base:
     .functype   __wasm_get_tls_base () -> (i32)
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index af74f0f40bbdf..60b04ad5abc87 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -66,6 +66,7 @@ struct Config {
   bool gcSections;
   llvm::StringSet<> keepSections;
   bool cooperativeMultithreading;
+  bool libcallThreadContext;
   std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport;
   std::optional<llvm::StringRef> memoryExport;
   bool sharedMemory;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index b06e0fbb55eec..605aa5dcebe94 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -556,13 +556,12 @@ static void readConfigs(opt::InputArgList &args) {
   } else if (args.hasArg(OPT_export_memory)) {
     ctx.arg.memoryExport = memoryName;
   }
-
   ctx.arg.sharedMemory = args.hasArg(OPT_shared_memory);
   ctx.arg.soName = args.getLastArgValue(OPT_soname);
   ctx.arg.importTable = args.hasArg(OPT_import_table);
   ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
   ctx.arg.cooperativeMultithreading =
-      args.hasArg(OPT_cooperative_multithreading);
+      args.hasArg(OPT_cooperative_multithreading);;
   ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
   if (ctx.arg.ltoo > 3)
     error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
@@ -757,6 +756,11 @@ static void setConfigs() {
     ctx.arg.memoryExport = memoryName;
   }
 
+  if (ctx.arg.cooperativeMultithreading) {
+    if (ctx.arg.sharedMemory)
+      error("--cooperative-multithreading is incompatible with 
--shared-memory");
+    ctx.arg.libcallThreadContext = true;
+  }
 }
 
 // Some command line options or some combinations of them are not allowed.
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index bd2a7a19e0887..6d18a0400ef97 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -240,7 +240,7 @@ def initial_memory: JJ<"initial-memory=">,
 
 def cooperative_multithreading: FF<"cooperative-multithreading">,
   HelpText<"Enable cooperative multithreading.">;
-
+  
 def max_memory: JJ<"max-memory=">,
   HelpText<"Maximum size of the linear memory">;
 

>From 843491367a07d034b255ba630e6b00e17c58bca5 Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Mon, 1 Jun 2026 17:10:42 +0100
Subject: [PATCH 09/11] fmt

---
 clang/lib/Driver/ToolChains/WebAssembly.cpp | 3 ++-
 lld/wasm/Config.h                           | 4 +++-
 lld/wasm/Driver.cpp                         | 8 +++++---
 lld/wasm/SyntheticSections.cpp              | 6 ++----
 lld/wasm/Writer.cpp                         | 9 ++++-----
 5 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp 
b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index d1e1766a0dee3..d8f23175eb58b 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -94,7 +94,8 @@ static bool WantsCooperativeMultithreading(const llvm::Triple 
&Triple,
 }
 
 static bool WantsSharedMemory(const llvm::Triple &Triple, const ArgList &Args) 
{
-  return WantsPthread(Triple, Args) && !WantsCooperativeMultithreading(Triple, 
Args);
+  return WantsPthread(Triple, Args) &&
+         !WantsCooperativeMultithreading(Triple, Args);
 }
 
 void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 60b04ad5abc87..d4789b88203eb 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -136,7 +136,9 @@ struct Config {
   std::optional<std::vector<std::string>> extraFeatures;
   llvm::SmallVector<uint8_t, 0> buildIdVector;
 
-  bool isMultithreaded() const { return sharedMemory || 
cooperativeMultithreading; }
+  bool isMultithreaded() const {
+    return sharedMemory || cooperativeMultithreading;
+  }
 };
 
 // The Ctx object hold all other (non-configuration) global state.
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 605aa5dcebe94..b2723220b5afb 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -556,12 +556,14 @@ static void readConfigs(opt::InputArgList &args) {
   } else if (args.hasArg(OPT_export_memory)) {
     ctx.arg.memoryExport = memoryName;
   }
+
   ctx.arg.sharedMemory = args.hasArg(OPT_shared_memory);
   ctx.arg.soName = args.getLastArgValue(OPT_soname);
   ctx.arg.importTable = args.hasArg(OPT_import_table);
   ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
   ctx.arg.cooperativeMultithreading =
-      args.hasArg(OPT_cooperative_multithreading);;
+      args.hasArg(OPT_cooperative_multithreading);
+  ;
   ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
   if (ctx.arg.ltoo > 3)
     error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
@@ -755,10 +757,10 @@ static void setConfigs() {
   if (!ctx.arg.memoryExport.has_value() && !ctx.arg.memoryImport.has_value()) {
     ctx.arg.memoryExport = memoryName;
   }
-
   if (ctx.arg.cooperativeMultithreading) {
     if (ctx.arg.sharedMemory)
-      error("--cooperative-multithreading is incompatible with 
--shared-memory");
+      error(
+          "--cooperative-multithreading is incompatible with --shared-memory");
     ctx.arg.libcallThreadContext = true;
   }
 }
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 753a1c7fe5c82..050f61c7f5c56 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -265,8 +265,7 @@ void ImportSection::writeBody() {
     import.Kind = WASM_EXTERNAL_MEMORY;
     import.Memory.Flags = 0;
     import.Memory.Minimum = out.memorySec->numMemoryPages;
-    if (out.memorySec->maxMemoryPages != 0 ||
-        ctx.arg.sharedMemory) {
+    if (out.memorySec->maxMemoryPages != 0 || ctx.arg.sharedMemory) {
       import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX;
       import.Memory.Maximum = out.memorySec->maxMemoryPages;
     }
@@ -407,8 +406,7 @@ void TableSection::assignIndexes() {
 void MemorySection::writeBody() {
   raw_ostream &os = bodyOutputStream;
 
-  bool hasMax =
-      maxMemoryPages != 0 || ctx.arg.sharedMemory;
+  bool hasMax = maxMemoryPages != 0 || ctx.arg.sharedMemory;
   writeUleb128(os, 1, "memory count");
   unsigned flags = 0;
   if (hasMax)
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 48145d005c117..d90ca859f3479 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -431,8 +431,7 @@ void Writer::layoutMemory() {
   }
 
   // Make space for the memory initialization flag
-  if (ctx.arg.sharedMemory &&
-      hasPassiveInitializedSegments()) {
+  if (ctx.arg.sharedMemory && hasPassiveInitializedSegments()) {
     memoryPtr = alignTo(memoryPtr, 4);
     ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol(
         "__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN);
@@ -1063,9 +1062,9 @@ OutputSegment *Writer::createOutputSegment(StringRef 
name) {
   // threads. In the non-shared memory case, we use passive segments only for
   // TLS segments, so that they can be reused, and for .bss segments, which
   // don't need to be included in the binary at all.
-  bool needsPassiveInit = ctx.arg.sharedMemory ||
-                          (ctx.arg.cooperativeMultithreading &&
-                           (s->isTLS() || s->name.starts_with(".bss")));
+  bool needsPassiveInit =
+      ctx.arg.sharedMemory || (ctx.arg.cooperativeMultithreading &&
+                               (s->isTLS() || s->name.starts_with(".bss")));
   if (needsPassiveInit)
     s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
   if (!ctx.arg.relocatable && name.starts_with(".bss"))

>From 95db2cb7aa88390b3e92971bc50c0cde6881c319 Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Fri, 5 Jun 2026 09:19:17 +0100
Subject: [PATCH 10/11] Fix tests

---
 lld/test/wasm/stack-pointer-abi.s | 2 +-
 lld/test/wasm/tls-libcall.s       | 2 +-
 lld/wasm/Writer.cpp               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lld/test/wasm/stack-pointer-abi.s 
b/lld/test/wasm/stack-pointer-abi.s
index fbae0475bcba2..c8c6370dbc7ff 100644
--- a/lld/test/wasm/stack-pointer-abi.s
+++ b/lld/test/wasm/stack-pointer-abi.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --cooperative-threading --no-gc-sections -o %t.libcall.wasm %t.o
+# RUN: wasm-ld --cooperative-multithreading --no-gc-sections -o 
%t.libcall.wasm %t.o
 # RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL
 # RUN: wasm-ld --no-gc-sections -o %t.global.wasm %t.o
 # RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL
diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s
index a0a7f37379bac..b8d8935dbf766 100644
--- a/lld/test/wasm/tls-libcall.s
+++ b/lld/test/wasm/tls-libcall.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
-# RUN: wasm-ld --cooperative-threading --shared-memory -no-gc-sections -o 
%t.wasm %t.o
+# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o
 # RUN: obj2yaml %t.wasm | FileCheck %s
 # RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | 
FileCheck %s --check-prefix=DIS
 
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index a03e5ff34e9e2..9f68432e1dc33 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -650,7 +650,7 @@ void Writer::populateTargetFeatures() {
                  sym->importModule && sym->importModule == "env";
         }))
       error(fileName + ": object file uses globals for thread context, "
-                       "but --cooperative-threading was specified");
+                       "but --cooperative-multithreading was specified");
   }
 
   if (inferFeatures)

>From 617d774fe49e4a531c3f52483f1819d18d631d77 Mon Sep 17 00:00:00 2001
From: Sy Brand <[email protected]>
Date: Fri, 5 Jun 2026 11:25:55 +0100
Subject: [PATCH 11/11] Update comment

---
 lld/wasm/Driver.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index b2723220b5afb..347f78b342513 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -1027,7 +1027,7 @@ static void createOptionalSymbols() {
   if (ctx.sym.firstPageEnd)
     ctx.sym.firstPageEnd->setVA(ctx.arg.pageSize);
 
-  // For non-shared memory programs we still need to define __tls_base since we
+  // For non-multithreaded programs we still need to define __tls_base since we
   // allow object files built with TLS to be linked into single threaded
   // programs, and such object files can contain references to this symbol.
   //

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to