Author: Sy Brand
Date: 2026-05-26T22:46:13Z
New Revision: 577e9a7cc82faf3dae98dba4aea917a6ffaab293

URL: 
https://github.com/llvm/llvm-project/commit/577e9a7cc82faf3dae98dba4aea917a6ffaab293
DIFF: 
https://github.com/llvm/llvm-project/commit/577e9a7cc82faf3dae98dba4aea917a6ffaab293.diff

LOG: [WebAssembly] WASIP3 Library Call Thread Context Support (#175800)

The [WebAssembly Component
Model](https://component-model.bytecodealliance.org/) has added support
for [cooperative
multithreading](https://github.com/WebAssembly/component-model/pull/557).
This has been implemented in the [Wasmtime
engine](https://github.com/bytecodealliance/wasmtime/pull/11751) and is
part of the wider project of [WASI preview
3](https://wasi.dev/roadmap#upcoming-wasi-03-releases), which is
currently tracked
[here](https://github.com/orgs/bytecodealliance/projects/16).

These changes require updating the way that `__stack_pointer` and
`__tls_base` work purely for a new `wasm32-wasip3` target; other targets
will not be touched. Specifically, rather than using a Wasm global for
tracking the stack pointer and TLS base, the new
[`context.get/set`](https://github.com/WebAssembly/component-model/blob/main/design/mvp/CanonicalABI.md#-canon-contextget)
component model builtin functions will be used (the intention being that
runtimes will need to aggressively optimize these calls into single
load/stores). For justification on this choice rather than switching out
the global at context-switch boundaries, see [this
comment](https://github.com/WebAssembly/wasi-libc/issues/691#issuecomment-3716405618)
and [this
comment](https://github.com/WebAssembly/wasi-libc/issues/691#issuecomment-3716916730).

This PR adds support for using library calls instead of globals for
holding the stack pointer and TLS base. When used, this thread context
ABI emits calls to `__wasm_{get,set}_{stack_pointer,tls_base}` when
needed. These functions can then be implemented in `libc`. This is
enabled only for the WASIp3 target.

There is a temporary macro define for `__wasm_libcall_thread_context__`
which can be removed once `wasi-libc` has fully migrated to the new ABI
for the WASIp3 target.

Added: 
    lld/test/wasm/stack-pointer-abi.s
    lld/test/wasm/thread-context-abi-mismatch.s
    lld/test/wasm/tls-libcall.s
    llvm/test/CodeGen/WebAssembly/stack-abi.ll
    llvm/test/DebugInfo/WebAssembly/thread-context-abi.ll

Modified: 
    clang/lib/Basic/Targets/WebAssembly.cpp
    clang/lib/Basic/Targets/WebAssembly.h
    clang/lib/Driver/ToolChains/WebAssembly.cpp
    clang/test/Preprocessor/wasm-target-features.c
    lld/wasm/Config.h
    lld/wasm/Driver.cpp
    lld/wasm/Options.td
    lld/wasm/SyntheticSections.cpp
    lld/wasm/Writer.cpp
    llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
    llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
    llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
    llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
    llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
    llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
    llvm/test/CodeGen/WebAssembly/thread_pointer.ll
    llvm/test/CodeGen/WebAssembly/tls-local-exec.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/Basic/Targets/WebAssembly.cpp 
b/clang/lib/Basic/Targets/WebAssembly.cpp
index ba3ff11ad4ea9..46f9bd10f01ec 100644
--- a/clang/lib/Basic/Targets/WebAssembly.cpp
+++ b/clang/lib/Basic/Targets/WebAssembly.cpp
@@ -123,6 +123,8 @@ void WebAssemblyTargetInfo::getTargetDefines(const 
LangOptions &Opts,
     Builder.defineMacro("__wasm_tail_call__");
   if (HasWideArithmetic)
     Builder.defineMacro("__wasm_wide_arithmetic__");
+  if (HasLibcallThreadContext)
+    Builder.defineMacro("__wasm_libcall_thread_context__");
   // Note that not all wasm features appear here.   For example,
   // HasCompatctImports
 

diff  --git a/clang/lib/Basic/Targets/WebAssembly.h 
b/clang/lib/Basic/Targets/WebAssembly.h
index 808342485cad0..6085197498163 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public 
TargetInfo {
   bool HasExtendedConst = false;
   bool HasFP16 = false;
   bool HasGC = false;
+  bool HasLibcallThreadContext = false;
   bool HasMultiMemory = false;
   bool HasMultivalue = false;
   bool HasMutableGlobals = false;
@@ -110,6 +111,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : 
public TargetInfo {
       PtrDiffType = SignedLong;
       IntPtrType = SignedLong;
     }
+    if (T.getOS() == llvm::Triple::WASIp3)
+      HasLibcallThreadContext = true;
   }
 
   StringRef getABI() const override;

diff  --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp 
b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index 56913778d453f..4c1cd937e81aa 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -88,6 +88,11 @@ static bool WantsPthread(const llvm::Triple &Triple, const 
ArgList &Args) {
   return WantsPthread;
 }
 
+static bool WantsLibcallThreadContext(const llvm::Triple &Triple,
+                                      const ArgList &Args) {
+  return Triple.getOS() == llvm::Triple::WASIp3;
+}
+
 void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                 const InputInfo &Output,
                                 const InputInfoList &Inputs,
@@ -169,6 +174,9 @@ void wasm::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
 
+  if (WantsLibcallThreadContext(ToolChain.getTriple(), Args))
+    CmdArgs.push_back("--libcall-thread-context");
+
   if (WantsPthread(ToolChain.getTriple(), Args))
     CmdArgs.push_back("--shared-memory");
 

diff  --git a/clang/test/Preprocessor/wasm-target-features.c 
b/clang/test/Preprocessor/wasm-target-features.c
index 3244fa61c0a4b..d5d0c241d86a3 100644
--- a/clang/test/Preprocessor/wasm-target-features.c
+++ b/clang/test/Preprocessor/wasm-target-features.c
@@ -217,6 +217,7 @@
 // GENERIC-NOT: #define __wasm_simd128__ 1{{$}}
 // GENERIC-NOT: #define __wasm_tail_call__ 1{{$}}
 // GENERIC-NOT: #define __wasm_wide_arithmetic__ 1{{$}}
+// GENERIC-NOT: #define __wasm_libcall_thread_context__ 1{{$}}
 
 // RUN: %clang -E -dM %s -o - 2>&1 \
 // RUN:     -target wasm32-unknown-unknown -mcpu=bleeding-edge \
@@ -251,3 +252,12 @@
 // RUN:   | FileCheck %s -check-prefix=BLEEDING-EDGE-NO-SIMD128
 //
 // BLEEDING-EDGE-NO-SIMD128-NOT: #define __wasm_simd128__ 1{{$}}
+
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm32-wasip3 \
+// RUN:   | FileCheck %s -check-prefix=LIBCALL-THREAD-CONTEXT
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm64-wasip3 \
+// RUN:   | FileCheck %s -check-prefix=LIBCALL-THREAD-CONTEXT
+
+// LIBCALL-THREAD-CONTEXT: #define __wasm_libcall_thread_context__ 1{{$}}

diff  --git a/lld/test/wasm/stack-pointer-abi.s 
b/lld/test/wasm/stack-pointer-abi.s
new file mode 100644
index 0000000000000..6bfe0d4a8daa8
--- /dev/null
+++ b/lld/test/wasm/stack-pointer-abi.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
+# RUN: wasm-ld --libcall-thread-context -o %t.libcall.wasm %t.o
+# RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL
+# RUN: wasm-ld -o %t.global.wasm %t.o
+# RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL
+
+  .globl  _start
+_start:
+  .functype _start () -> ()
+  end_function
+
+# LIBCALL: Name: __init_stack_pointer
+# GLOBAL: Name: __stack_pointer

diff  --git a/lld/test/wasm/thread-context-abi-mismatch.s 
b/lld/test/wasm/thread-context-abi-mismatch.s
new file mode 100644
index 0000000000000..069534cbe5762
--- /dev/null
+++ b/lld/test/wasm/thread-context-abi-mismatch.s
@@ -0,0 +1,22 @@
+# Test that linking object files with mismatched thread context ABIs fails 
with an error.
+# The presence of an import of __stack_pointer from the env module should be 
treated 
+# as an indication that the global thread context ABI is being used.
+
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
+# RUN: not wasm-ld --libcall-thread-context %t.o -o %t.wasm 2>&1 | FileCheck %s
+
+# CHECK: object file uses globals for thread context, but 
--libcall-thread-context was specified
+
+.globl _start
+_start:
+  .functype _start () -> ()
+  end_function
+
+.globaltype __stack_pointer, i32
+
+.globl use_stack_pointer
+use_stack_pointer:
+  .functype use_stack_pointer () -> ()
+  global.get __stack_pointer
+  drop
+  end_function

diff  --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s
new file mode 100644
index 0000000000000..df8b8f8be0207
--- /dev/null
+++ b/lld/test/wasm/tls-libcall.s
@@ -0,0 +1,71 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
+# RUN: wasm-ld --libcall-thread-context --shared-memory -no-gc-sections -o 
%t.wasm %t.o
+# RUN: obj2yaml %t.wasm | FileCheck %s
+# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | 
FileCheck %s --check-prefix=DIS
+
+.globl         __wasm_get_tls_base
+__wasm_get_tls_base:
+    .functype   __wasm_get_tls_base () -> (i32)
+    i32.const 0
+    end_function
+
+.globl _start
+_start:
+  .functype _start () -> (i32)
+  call __wasm_get_tls_base
+  i32.const tls1@TLSREL
+  i32.add
+  i32.load 0
+  call __wasm_get_tls_base
+  i32.const tls2@TLSREL
+  i32.add
+  i32.load 0
+  i32.add
+  end_function
+
+.section  .tdata.tls1,"",@
+.globl  tls1
+tls1:
+  .int32  1
+  .size tls1, 4
+
+.section  .tdata.tls2,"",@
+.globl  tls2
+tls2:
+  .int32  2
+  .size tls2, 4
+
+.section  .custom_section.target_features,"",@
+  .int8 2
+  .int8 43
+  .int8 11
+  .ascii  "bulk-memory"
+  .int8 43
+  .int8 7
+  .ascii  "atomics"
+
+
+# CHECK:      GlobalNames:
+# CHECK-NEXT:      - Index:           0
+# CHECK-NEXT:        Name:            __init_stack_pointer
+# CHECK-NEXT:      - Index:           1
+# CHECK-NEXT:        Name:            __init_tls_base
+# CHECK-NEXT:      - Index:           2
+# CHECK-NEXT:        Name:            __tls_size
+# CHECK-NEXT:      - Index:           3
+# CHECK-NEXT:        Name:            __tls_align
+
+# DIS-LABEL: <__wasm_init_memory>:
+
+# DIS-LABEL: <_start>:
+# DIS-EMPTY:
+# DIS-NEXT:       call    4
+# DIS-NEXT:       i32.const       0
+# DIS-NEXT:       i32.add 
+# DIS-NEXT:       i32.load        0
+# DIS-NEXT:       call    4
+# DIS-NEXT:       i32.const       4
+# DIS-NEXT:       i32.add 
+# DIS-NEXT:       i32.load        0
+# DIS-NEXT:       i32.add 
+# DIS-NEXT:       end

diff  --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 491bf9233b0cf..71a378a412e9e 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -35,6 +35,7 @@ class Symbol;
 class DefinedData;
 class GlobalSymbol;
 class DefinedFunction;
+class UndefinedFunction;
 class DefinedGlobal;
 class UndefinedGlobal;
 class TableSymbol;
@@ -64,6 +65,7 @@ struct Config {
   bool growableTable;
   bool gcSections;
   llvm::StringSet<> keepSections;
+  bool libcallThreadContext;
   std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport;
   std::optional<llvm::StringRef> memoryExport;
   bool sharedMemory;
@@ -252,6 +254,14 @@ struct Ctx {
     // Used as an address space for function pointers, with each function that
     // is used as a function pointer being allocated a slot.
     TableSymbol *indirectFunctionTable;
+
+    // __wasm_set_tls_base
+    // Function used to set TLS base in libcall thread context modules.
+    UndefinedFunction *setTLSBase;
+
+    // __wasm_get_tls_base
+    // Function used to get TLS base in libcall thread context modules.
+    UndefinedFunction *getTLSBase;
   };
   WasmSym sym;
 

diff  --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 2cba6ae540526..f2eec055f65b2 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -561,6 +561,7 @@ static void readConfigs(opt::InputArgList &args) {
   ctx.arg.soName = args.getLastArgValue(OPT_soname);
   ctx.arg.importTable = args.hasArg(OPT_import_table);
   ctx.arg.importUndefined = args.hasArg(OPT_import_undefined);
+  ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context);
   ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
   if (ctx.arg.ltoo > 3)
     error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
@@ -883,6 +884,16 @@ createUndefinedGlobal(StringRef name, 
llvm::wasm::WasmGlobalType *type) {
   return sym;
 }
 
+static UndefinedFunction *createUndefinedFunction(StringRef name,
+                                                  WasmSignature *signature) {
+  auto *sym = cast<UndefinedFunction>(symtab->addUndefinedFunction(
+      name, std::nullopt, std::nullopt, WASM_SYMBOL_UNDEFINED, nullptr,
+      signature, true));
+  ctx.arg.allowUndefinedSymbols.insert(sym->getName());
+  sym->isUsedInRegularObj = true;
+  return sym;
+}
+
 static InputGlobal *createGlobal(StringRef name, bool isMutable) {
   llvm::wasm::WasmGlobal wasmGlobal;
   bool is64 = ctx.arg.is64.value_or(false);
@@ -917,17 +928,26 @@ static void createSyntheticSymbols() {
                                                             true};
   static llvm::wasm::WasmGlobalType mutableGlobalTypeI64 = {WASM_TYPE_I64,
                                                             true};
+
   ctx.sym.callCtors = symtab->addSyntheticFunction(
       "__wasm_call_ctors", WASM_SYMBOL_VISIBILITY_HIDDEN,
       make<SyntheticFunction>(nullSignature, "__wasm_call_ctors"));
 
   bool is64 = ctx.arg.is64.value_or(false);
 
+  auto stack_pointer_name =
+      ctx.arg.libcallThreadContext ? "__init_stack_pointer" : 
"__stack_pointer";
   if (ctx.isPic) {
-    ctx.sym.stackPointer =
-        createUndefinedGlobal("__stack_pointer", ctx.arg.is64.value_or(false)
-                                                     ? &mutableGlobalTypeI64
-                                                     : &mutableGlobalTypeI32);
+    if (ctx.arg.libcallThreadContext) {
+      ctx.sym.stackPointer = createUndefinedGlobal(
+          stack_pointer_name,
+          ctx.arg.is64.value_or(false) ? &globalTypeI64 : &globalTypeI32);
+    } else {
+      ctx.sym.stackPointer = createUndefinedGlobal(stack_pointer_name,
+                                                   ctx.arg.is64.value_or(false)
+                                                       ? &mutableGlobalTypeI64
+                                                       : 
&mutableGlobalTypeI32);
+    }
     // For PIC code, we import two global variables (__memory_base and
     // __table_base) from the environment and use these as the offset at
     // which to load our static data and function table.
@@ -940,14 +960,18 @@ static void createSyntheticSymbols() {
     ctx.sym.tableBase->markLive();
   } else {
     // For non-PIC code
-    ctx.sym.stackPointer = createGlobalVariable("__stack_pointer", true);
+    ctx.sym.stackPointer =
+        createGlobalVariable(stack_pointer_name, 
!ctx.arg.libcallThreadContext);
     ctx.sym.stackPointer->markLive();
   }
 
   if (ctx.arg.sharedMemory) {
     // TLS symbols are all hidden/dso-local
+    auto tls_base_name =
+        ctx.arg.libcallThreadContext ? "__init_tls_base" : "__tls_base";
     ctx.sym.tlsBase =
-        createGlobalVariable("__tls_base", true, 
WASM_SYMBOL_VISIBILITY_HIDDEN);
+        createGlobalVariable(tls_base_name, !ctx.arg.libcallThreadContext,
+                             WASM_SYMBOL_VISIBILITY_HIDDEN);
     ctx.sym.tlsSize = createGlobalVariable("__tls_size", false,
                                            WASM_SYMBOL_VISIBILITY_HIDDEN);
     ctx.sym.tlsAlign = createGlobalVariable("__tls_align", false,
@@ -956,6 +980,17 @@ static void createSyntheticSymbols() {
         "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN,
         make<SyntheticFunction>(is64 ? i64ArgSignature : i32ArgSignature,
                                 "__wasm_init_tls"));
+    if (ctx.arg.libcallThreadContext) {
+      ctx.sym.tlsBase->markLive();
+      ctx.sym.tlsSize->markLive();
+      ctx.sym.tlsAlign->markLive();
+      static WasmSignature setTLSBaseSignature{{}, {ValType::I32}};
+      ctx.sym.setTLSBase =
+          createUndefinedFunction("__wasm_set_tls_base", &setTLSBaseSignature);
+      static WasmSignature getTLSBaseSignature{{ValType::I32}, {}};
+      ctx.sym.getTLSBase =
+          createUndefinedFunction("__wasm_get_tls_base", &getTLSBaseSignature);
+    }
   }
 }
 

diff  --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index a009cac7f57ad..144eee33061e1 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -238,6 +238,9 @@ def page_size: JJ<"page-size=">,
 def initial_memory: JJ<"initial-memory=">,
   HelpText<"Initial size of the linear memory">;
 
+def libcall_thread_context: FF<"libcall-thread-context">,
+  HelpText<"Use library calls for thread context access instead of globals.">;
+  
 def max_memory: JJ<"max-memory=">,
   HelpText<"Maximum size of the linear memory">;
 

diff  --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 9ba43ef0fae0b..d1a01c7ec3f9d 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -52,6 +52,16 @@ class SubSection {
   raw_string_ostream os{body};
 };
 
+void writeGetTLSBase(const Ctx &ctx, raw_ostream &os) {
+  if (ctx.arg.libcallThreadContext) {
+    writeU8(os, WASM_OPCODE_CALL, "call");
+    writeUleb128(os, ctx.sym.getTLSBase->getFunctionIndex(), "function index");
+  } else {
+    writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_SET");
+    writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base");
+  }
+}
+
 } // namespace
 
 bool DylinkSection::isNeeded() const {
@@ -474,11 +484,12 @@ void GlobalSection::generateRelocationCode(raw_ostream 
&os, bool TLS) const {
 
     if (auto *d = dyn_cast<DefinedData>(sym)) {
       // Get __memory_base
-      writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
       if (sym->isTLS())
-        writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base");
-      else
+        writeGetTLSBase(ctx, os);
+      else {
+        writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
         writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), 
"__memory_base");
+      }
 
       // Add the virtual address of the data symbol
       writePtrConst(os, d->getVA(), is64, "offset");
@@ -519,7 +530,7 @@ void GlobalSection::writeBody() {
       // the correct runtime value during `__wasm_apply_global_relocs`.
       if (!ctx.arg.extendedConst && ctx.isPic && !sym->isTLS())
         mutable_ = true;
-      // With multi-theadeding any TLS globals must be mutable since they get
+      // With multi-threading any TLS globals must be mutable since they get
       // set during `__wasm_apply_global_tls_relocs`
       if (ctx.arg.sharedMemory && sym->isTLS())
         mutable_ = true;

diff  --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 038a8b3f5417d..688bb829e1c42 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -123,6 +123,15 @@ class Writer {
   llvm::SmallDenseMap<StringRef, OutputSegment *> segmentMap;
 };
 
+void writeSetTLSBase(const Ctx &ctx, raw_ostream &os) {
+  if (ctx.arg.libcallThreadContext) {
+    writeU8(os, WASM_OPCODE_CALL, "call");
+    writeUleb128(os, ctx.sym.setTLSBase->getFunctionIndex(), "function index");
+  } else {
+    writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET");
+    writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base");
+  }
+}
 } // anonymous namespace
 
 void Writer::calculateCustomSections() {
@@ -635,6 +644,16 @@ void Writer::populateTargetFeatures() {
       return segment->live && segment->isTLS();
     };
     tlsUsed = tlsUsed || llvm::any_of(file->segments, isTLS);
+
+    // Ensure that we're not mixing incompatible thread context models
+    if (ctx.arg.libcallThreadContext &&
+        llvm::any_of(file->getSymbols(), [](const auto &sym) {
+          return sym && sym->getName() == "__stack_pointer" &&
+                 sym->kind() == Symbol::UndefinedGlobalKind &&
+                 sym->importModule && sym->importModule == "env";
+        }))
+      error(fileName + ": object file uses globals for thread context, "
+                       "but --libcall-thread-context was specified");
   }
 
   if (inferFeatures)
@@ -1356,9 +1375,9 @@ void Writer::createInitMemoryFunction() {
                   "i32.add");
         }
 
-        // When we initialize the TLS segment we also set the `__tls_base`
-        // global.  This allows the runtime to use this static copy of the
-        // TLS data for the first/main thread.
+        // When we initialize the TLS segment we also set the TLS base.
+        // This allows the runtime to use this static copy of the TLS data
+        // for the first/main thread.
         if (ctx.arg.sharedMemory && s->isTLS()) {
           if (ctx.isPic) {
             // Cache the result of the addionion in local 0
@@ -1367,8 +1386,7 @@ void Writer::createInitMemoryFunction() {
           } else {
             writePtrConst(os, s->startVA, is64, "destination address");
           }
-          writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET");
-          writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base");
+          writeSetTLSBase(ctx, os);
           if (ctx.isPic) {
             writeU8(os, WASM_OPCODE_LOCAL_GET, "local.tee");
             writeUleb128(os, 1, "local 1");
@@ -1641,8 +1659,7 @@ void Writer::createInitTLSFunction() {
       writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
       writeUleb128(os, 0, "local index");
 
-      writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set");
-      writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index");
+      writeSetTLSBase(ctx, os);
 
       // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend
       // op.

diff  --git 
a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h 
b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 58d6b44a94d4d..6f52281628e46 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -110,8 +110,9 @@ enum TOF {
   MO_MEMORY_BASE_REL,
 
   // On a symbol operand this indicates that the immediate is the symbol
-  // address relative the __tls_base wasm global.
-  // Only applicable to data symbols.
+  // address relative to the TLS base. This is retrieved through
+  // __wasm_get_tls_base() when using libcall thread context, and the 
__tls_base
+  // global otherwise. Only applicable to data symbols.
   MO_TLS_BASE_REL,
 
   // On a symbol operand this indicates that the immediate is the symbol

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index edea99e629407..c25972343c96a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -236,8 +236,7 @@ MCSymbol 
*WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) {
   if (Name == "__stack_pointer" || Name == "__tls_base" ||
       Name == "__memory_base" || Name == "__table_base" ||
       Name == "__tls_size" || Name == "__tls_align") {
-    bool Mutable =
-        Name == "__stack_pointer" || Name == "__tls_base";
+    bool Mutable = Name == "__stack_pointer" || Name == "__tls_base";
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
     WasmSym->setGlobalType(wasm::WasmGlobalType{
         uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64
@@ -265,6 +264,14 @@ MCSymbol 
*WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) {
     wasm::ValType AddrType =
         Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32;
     Params.push_back(AddrType);
+  } else if (Name == "__wasm_get_stack_pointer" ||
+             Name == "__wasm_get_tls_base") {
+    WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+    Returns.push_back(wasm::ValType::I32);
+  } else if (Name == "__wasm_set_stack_pointer" ||
+             Name == "__wasm_set_tls_base") {
+    WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+    Params.push_back(wasm::ValType::I32);
   } else { // Function symbols
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
     WebAssembly::getLibcallSignature(Subtarget, Name, Returns, Params);

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 5c3127e2d3dc6..e05b23d255894 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -366,7 +366,14 @@ bool 
WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
           const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
           Register NewReg = MRI.createVirtualRegister(RC);
           auto InsertPt = std::next(MI.getIterator());
-          if (UseEmpty[OldReg.virtRegIndex()]) {
+          // When libcalls are emitted for thread context, the frame base vreg
+          // has an implicit use in the DW_AT_frame_base debug info, so we
+          // should not remove it.
+          bool NeedsRegForDebug =
+              MFI.isFrameBaseVirtual() && OldReg == MFI.getFrameBaseVreg() &&
+              MF.getFunction().getSubprogram() &&
+              
MF.getSubtarget<WebAssemblySubtarget>().hasLibcallThreadContext();
+          if (UseEmpty[OldReg.virtRegIndex()] && !NeedsRegForDebug) {
             unsigned Opc = getDropOpcode(RC);
             MachineInstr *Drop =
                 BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index e0e72316b7e2c..50f820086c5a6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -135,13 +135,19 @@ bool WebAssemblyFrameLowering::needsSPForLocalFrame(
       any_of(MRI.use_operands(getSPReg(MF)),
              [](MachineOperand &MO) { return !MO.isImplicit(); });
 
+  // With libcall thread context, we need SP in the prolog when debug
+  // info is present so we can allocate a local for DWARF to reference.
+  bool NeedsSPForDebug =
+      MF.getFunction().getSubprogram() &&
+      MF.getSubtarget<WebAssemblySubtarget>().hasLibcallThreadContext();
+
   return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF) ||
-         HasExplicitSPUse;
+         HasExplicitSPUse || NeedsSPForDebug;
 }
 
 // In function with EH pads, we need to make a copy of the value of
-// __stack_pointer global in SP32/64 register, in order to use it when
-// restoring __stack_pointer after an exception is caught.
+// the stack pointer in the SP32/64 register, in order to use it when
+// restoring the stack pointer after an exception is caught.
 bool WebAssemblyFrameLowering::needsPrologForEH(
     const MachineFunction &MF) const {
   auto EHType = MF.getTarget().getMCAsmInfo().getExceptionHandlingType();
@@ -151,15 +157,16 @@ bool WebAssemblyFrameLowering::needsPrologForEH(
 
 /// Returns true if this function needs a local user-space stack pointer.
 /// Unlike a machine stack pointer, the wasm user stack pointer is a global
-/// variable, so it is loaded into a register in the prolog.
+/// variable or managed by library calls, so it is loaded
+/// into a register in the prolog.
 bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF) const {
   return needsSPForLocalFrame(MF) || needsPrologForEH(MF);
 }
 
 /// Returns true if the local user-space stack pointer needs to be written back
-/// to __stack_pointer global by this function (this is not meaningful if
-/// needsSP is false). If false, the stack red zone can be used and only a 
local
-/// SP is needed.
+/// to the stack pointer global/thread context by this function (this is not
+/// meaningful if needsSP is false). If false, the stack red zone can be used
+/// and only a local SP is needed.
 bool WebAssemblyFrameLowering::needsSPWriteback(
     const MachineFunction &MF) const {
   auto &MFI = MF.getFrameInfo();
@@ -227,17 +234,25 @@ WebAssemblyFrameLowering::getOpcGlobSet(const 
MachineFunction &MF) {
              : WebAssembly::GLOBAL_SET_I32;
 }
 
-void WebAssemblyFrameLowering::writeSPToGlobal(
+void WebAssemblyFrameLowering::writeBackSP(
     unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB,
     MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const {
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 
-  const char *ES = "__stack_pointer";
-  auto *SPSymbol = MF.createExternalSymbolName(ES);
+  if (MF.getSubtarget<WebAssemblySubtarget>().hasLibcallThreadContext()) {
+    const char *ES = "__wasm_set_stack_pointer";
+    auto *SPSymbol = MF.createExternalSymbolName(ES);
+    BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::CALL))
+        .addExternalSymbol(SPSymbol)
+        .addReg(SrcReg);
+  } else {
+    const char *ES = "__stack_pointer";
+    auto *SPSymbol = MF.createExternalSymbolName(ES);
 
-  BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF)))
-      .addExternalSymbol(SPSymbol)
-      .addReg(SrcReg);
+    BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF)))
+        .addExternalSymbol(SPSymbol)
+        .addReg(SrcReg);
+  }
 }
 
 MachineBasicBlock::iterator
@@ -251,7 +266,7 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
   if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
       needsSPWriteback(MF)) {
     DebugLoc DL = I->getDebugLoc();
-    writeSPToGlobal(getSPReg(MF), MF, MBB, I, DL);
+    writeBackSP(getSPReg(MF), MF, MBB, I, DL);
   }
   return MBB.erase(I);
 }
@@ -283,10 +298,17 @@ void 
WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
   if (StackSize)
     SPReg = MRI.createVirtualRegister(PtrRC);
 
-  const char *ES = "__stack_pointer";
-  auto *SPSymbol = MF.createExternalSymbolName(ES);
-  BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg)
-      .addExternalSymbol(SPSymbol);
+  if (ST.hasLibcallThreadContext()) {
+    const char *ES = "__wasm_get_stack_pointer";
+    auto *SPSymbol = MF.createExternalSymbolName(ES);
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CALL), SPReg)
+        .addExternalSymbol(SPSymbol);
+  } else {
+    const char *ES = "__stack_pointer";
+    auto *SPSymbol = MF.createExternalSymbolName(ES);
+    BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg)
+        .addExternalSymbol(SPSymbol);
+  }
 
   bool HasBP = hasBP(MF);
   if (HasBP) {
@@ -322,7 +344,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction 
&MF,
         .addReg(getSPReg(MF));
   }
   if (StackSize && needsSPWriteback(MF)) {
-    writeSPToGlobal(getSPReg(MF), MF, MBB, InsertPt, DL);
+    writeBackSP(getSPReg(MF), MF, MBB, InsertPt, DL);
   }
 }
 
@@ -364,7 +386,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction 
&MF,
     SPReg = SPFPReg;
   }
 
-  writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL);
+  writeBackSP(SPReg, MF, MBB, InsertPt, DL);
 }
 
 bool WebAssemblyFrameLowering::isSupportedStackID(
@@ -386,6 +408,11 @@ WebAssemblyFrameLowering::getDwarfFrameBase(const 
MachineFunction &MF) const {
     unsigned LocalNum = MFI.getFrameBaseLocal();
     Loc.Location.WasmLoc = {WebAssembly::TI_LOCAL, LocalNum};
   } else {
+    // There is no __stack_pointer global in libcall thread context mode, so
+    // TI_GLOBAL_RELOC would produce a bogus relocation. We take care to ensure
+    // that this code is not reached in that case, but assert here to be sure.
+    assert(!MF.getSubtarget<WebAssemblySubtarget>().hasLibcallThreadContext());
+
     // TODO: This should work on a breakpoint at a function with no frame,
     // but probably won't work for traversing up the stack.
     Loc.Location.WasmLoc = {WebAssembly::TI_GLOBAL_RELOC, 0};

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h 
b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index 710d5173d64db..f836f4e95a93b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -23,7 +23,7 @@ class WebAssemblyFrameLowering final : public 
TargetFrameLowering {
 public:
   /// Size of the red zone for the user stack (leaf functions can use this much
   /// space below the stack pointer without writing it back to __stack_pointer
-  /// global).
+  /// global/__wasm_set_stack_pointer).
   // TODO: (ABI) Revisit and decide how large it should be.
   static const size_t RedZoneSize = 128;
 
@@ -47,11 +47,10 @@ class WebAssemblyFrameLowering final : public 
TargetFrameLowering {
 
   bool needsPrologForEH(const MachineFunction &MF) const;
 
-  /// Write SP back to __stack_pointer global.
-  void writeSPToGlobal(unsigned SrcReg, MachineFunction &MF,
-                       MachineBasicBlock &MBB,
-                       MachineBasicBlock::iterator &InsertStore,
-                       const DebugLoc &DL) const;
+  /// Write SP back to __stack_pointer global, or call 
__wasm_set_stack_pointer.
+  void writeBackSP(unsigned SrcReg, MachineFunction &MF, MachineBasicBlock 
&MBB,
+                   MachineBasicBlock::iterator &InsertStore,
+                   const DebugLoc &DL) const;
 
   // Returns the index of the WebAssembly local to which the stack object
   // FrameIndex in MF should be allocated, or std::nullopt.

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 510be1ca64fa8..c7b57588877b7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -337,10 +337,8 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
     MVT PtrVT = TLI.getPointerTy(CurDAG->getDataLayout());
     switch (IntNo) {
     case Intrinsic::wasm_tls_base: {
-      MachineSDNode *TLSBase = CurDAG->getMachineNode(
-          GlobalGetIns, DL, PtrVT, MVT::Other,
-          CurDAG->getTargetExternalSymbol("__tls_base", PtrVT),
-          Node->getOperand(0));
+      MachineSDNode *TLSBase = llvm::WebAssembly::getTLSBase(
+          *CurDAG, DL, Subtarget, Node->getOperand(0));
       ReplaceNode(Node, TLSBase);
       return;
     }

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index dda91ac19b44a..7f22dc0fed135 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2087,17 +2087,11 @@ 
WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
       model == GlobalValue::LocalDynamicTLSModel ||
       (model == GlobalValue::GeneralDynamicTLSModel &&
        getTargetMachine().shouldAssumeDSOLocal(GV))) {
-    // For DSO-local TLS variables we use offset from __tls_base
+    // For DSO-local TLS variables we use offset from __tls_base, or
+    // __wasm_get_tls_base() if using libcall thread context.
 
     MVT PtrVT = getPointerTy(DAG.getDataLayout());
-    auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
-                                       : WebAssembly::GLOBAL_GET_I32;
-    const char *BaseName = MF.createExternalSymbolName("__tls_base");
-
-    SDValue BaseAddr(
-        DAG.getMachineNode(GlobalGet, DL, PtrVT,
-                           DAG.getTargetExternalSymbol(BaseName, PtrVT)),
-        0);
+    SDValue BaseAddr(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0);
 
     SDValue TLSOffset = DAG.getTargetGlobalAddress(
         GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
@@ -2283,14 +2277,7 @@ SDValue 
WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
   }
 
   case Intrinsic::thread_pointer: {
-    MVT PtrVT = getPointerTy(DAG.getDataLayout());
-    auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
-                                       : WebAssembly::GLOBAL_GET_I32;
-    const char *TlsBase = MF.createExternalSymbolName("__tls_base");
-    return SDValue(
-        DAG.getMachineNode(GlobalGet, DL, PtrVT,
-                           DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
-        0);
+    return SDValue(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0);
   }
   }
 }

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index 664f0f2e25ffc..cb96e313e5f1a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -376,8 +376,8 @@ bool 
WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
 }
 
 // After the stack is unwound due to a thrown exception, the __stack_pointer
-// global can point to an invalid address. This inserts instructions that
-// restore __stack_pointer global.
+// global/__wasm_get_stack_pointer() can point to an invalid address. This
+// inserts instructions that restore the stack pointer state.
 bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) {
   const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>(
       MF.getSubtarget().getFrameLowering());
@@ -390,11 +390,11 @@ bool 
WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) {
       continue;
     Changed = true;
 
-    // Insert __stack_pointer restoring instructions at the beginning of each 
EH
+    // Insert stack pointer restoring instructions at the beginning of each EH
     // pad, after the catch instruction. Here it is safe to assume that SP32
-    // holds the latest value of __stack_pointer, because the only exception 
for
-    // this case is when a function uses the red zone, but that only happens
-    // with leaf functions, and we don't restore __stack_pointer in leaf
+    // holds the latest value of the stack pointer, because the only exception
+    // for this case is when a function uses the red zone, but that only 
happens
+    // with leaf functions, and we don't restore the stack pointer in leaf
     // functions anyway.
     auto InsertPos = MBB.begin();
     // Skip EH_LABELs in the beginning of an EH pad if present.
@@ -404,8 +404,8 @@ bool 
WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) {
            WebAssembly::isCatch(InsertPos->getOpcode()) &&
            "catch/catch_all should be present in every EH pad at this point");
     ++InsertPos; // Skip the catch instruction
-    FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB,
-                                   InsertPos, MBB.begin()->getDebugLoc());
+    FrameLowering->writeBackSP(FrameLowering->getSPReg(MF), MF, MBB, InsertPos,
+                               MBB.begin()->getDebugLoc());
   }
   return Changed;
 }

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 97f2ed0a828ba..9015ceab87fb7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -251,6 +251,10 @@ static void query(const MachineInstr &MI, bool &Read, bool 
&Write,
       !strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer"))
     StackPointer = true;
 
+  if (MI.isCall() && MI.getOperand(0).isSymbol() &&
+      !strcmp(MI.getOperand(0).getSymbolName(), "__wasm_get_stack_pointer"))
+    StackPointer = true;
+
   // Analyze calls.
   if (MI.isCall()) {
     queryCallee(MI, Read, Write, Effects, StackPointer);
@@ -287,17 +291,28 @@ static MachineInstr *getVRegDef(unsigned Reg, const 
MachineInstr *Insert,
 // generalization of MachineRegisterInfo::hasOneNonDBGUse that uses
 // LiveIntervals to handle complex cases in optimized code.
 static bool hasSingleUse(unsigned Reg, MachineRegisterInfo &MRI,
-                         WebAssemblyFunctionInfo &MFI, bool Optimize,
+                         const MachineFunction &MF, bool Optimize,
                          MachineInstr *Def, LiveIntervals *LIS) {
+  auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+  // The frame base always has an implicit DBG use as DW_AT_frame_base.
+  if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) {
+    // When using global thread context, the frame base can be encoded
+    // as an offset from __stack_pointer, so the vreg can be stackified.
+    // However, when using libcall thread context, we need to keep the frame
+    // base vreg around if debug info is enabled, because there is no
+    // global to refer to.
+    bool NeedsRegForDebug =
+        MF.getFunction().getSubprogram() &&
+        MF.getSubtarget<WebAssemblySubtarget>().hasLibcallThreadContext();
+    if (!Optimize || NeedsRegForDebug)
+      return false;
+  }
   if (!Optimize) {
     // Using "hasOneUse" instead of "hasOneNonDBGUse" here because we don't
     // want to stackify DBG_VALUE operands - WASM stack locations are less
     // useful and less widely supported than WASM local locations.
     if (!MRI.hasOneUse(Reg))
       return false;
-    // The frame base always has an implicit DBG use as DW_AT_frame_base.
-    if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg)
-      return false;
     return true;
   }
 
@@ -918,7 +933,7 @@ bool 
WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
         bool CanMove = SameBlock &&
                        isSafeToMove(Def, &Use, Insert, MFI, MRI, Optimize) &&
                        !TreeWalker.isOnStack(Reg);
-        if (CanMove && hasSingleUse(Reg, MRI, MFI, Optimize, DefI, LIS)) {
+        if (CanMove && hasSingleUse(Reg, MRI, MF, Optimize, DefI, LIS)) {
           Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, 
MRI);
 
           // If we are removing the frame base reg completely, remove the debug
@@ -960,7 +975,7 @@ bool 
WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
           Register UseReg = SubsequentUse->getReg();
           // TODO: This single-use restriction could be relaxed by using tees
           if (DefReg != UseReg ||
-              !hasSingleUse(DefReg, MRI, MFI, Optimize, nullptr, nullptr))
+              !hasSingleUse(DefReg, MRI, MF, Optimize, nullptr, nullptr))
             break;
           MFI.stackifyVReg(MRI, DefReg);
           ++SubsequentDef;

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 641eef73044cd..6326b7d76db82 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -40,6 +40,10 @@ 
WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU,
 
   ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
 
+  // WASIP3 implies using the libcall thread context.
+  if (TargetTriple.getOS() == Triple::WASIp3)
+    HasLibcallThreadContext = true;
+
   FeatureBitset Bits = getFeatureBits();
 
   // bulk-memory implies bulk-memory-opt

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h 
b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 798dea25ef5e6..5c6f4cb5b36ff 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -52,6 +52,7 @@ class WebAssemblySubtarget final : public 
WebAssemblyGenSubtargetInfo {
   bool HasExtendedConst = false;
   bool HasFP16 = false;
   bool HasGC = false;
+  bool HasLibcallThreadContext = false;
   bool HasMultiMemory = false;
   bool HasMultivalue = false;
   bool HasMutableGlobals = false;
@@ -116,6 +117,7 @@ class WebAssemblySubtarget final : public 
WebAssemblyGenSubtargetInfo {
   bool hasExtendedConst() const { return HasExtendedConst; }
   bool hasFP16() const { return HasFP16; }
   bool hasGC() const { return HasGC; }
+  bool hasLibcallThreadContext() const { return HasLibcallThreadContext; }
   bool hasMultiMemory() const { return HasMultiMemory; }
   bool hasMultivalue() const { return HasMultivalue; }
   bool hasMutableGlobals() const { return HasMutableGlobals; }

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index f533e839fe6d0..1361dd99b7072 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -410,7 +410,7 @@ class CoalesceFeaturesAndStripAtomics final : public 
ModulePass {
     // Code compiled without atomics or bulk-memory may have had its atomics or
     // thread-local data lowered to nonatomic operations or non-thread-local
     // data. In that case, we mark the pseudo-feature "shared-mem" as 
disallowed
-    // to tell the linker that it would be unsafe to allow this code ot be used
+    // to tell the linker that it would be unsafe to allow this code to be used
     // in a module with shared memory.
     if (Stripped) {
       M.addModuleFlag(Module::ModFlagBehavior::Error, 
"wasm-feature-shared-mem",

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index 890486778e700..ac8df67fe7557 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -194,3 +194,26 @@ bool WebAssembly::canLowerReturn(size_t ResultSize,
                                  const WebAssemblySubtarget *Subtarget) {
   return ResultSize <= 1 || canLowerMultivalueReturn(Subtarget);
 }
+
+MachineSDNode *WebAssembly::getTLSBase(SelectionDAG &DAG, const SDLoc &DL,
+                                       const WebAssemblySubtarget *Subtarget,
+                                       SDValue Chain) {
+  MVT PtrVT = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
+
+  unsigned Opcode;
+  const char *SymName;
+  if (Subtarget->hasLibcallThreadContext()) {
+    Opcode = WebAssembly::CALL;
+    SymName = "__wasm_get_tls_base";
+  } else {
+    Opcode = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
+                               : WebAssembly::GLOBAL_GET_I32;
+    SymName = "__tls_base";
+  }
+
+  SDValue Sym = DAG.getTargetExternalSymbol(SymName, PtrVT);
+
+  if (Chain.getNode())
+    return DAG.getMachineNode(Opcode, DL, {PtrVT, MVT::Other}, {Sym, Chain});
+  return DAG.getMachineNode(Opcode, DL, PtrVT, Sym);
+}

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h 
b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
index 046b1b5db2a79..0827791d93657 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
 
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/CommandLine.h"
 
 namespace llvm {
@@ -27,6 +28,9 @@ class MCSymbolWasm;
 class TargetRegisterClass;
 class WebAssemblyFunctionInfo;
 class WebAssemblySubtarget;
+class MachineSDNode;
+class SDLoc;
+class SelectionDAG;
 
 namespace WebAssembly {
 
@@ -73,6 +77,13 @@ bool canLowerMultivalueReturn(const WebAssemblySubtarget 
*Subtarget);
 /// memory.
 bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget);
 
+// Get the TLS base value for the current target
+// If using libcall thread context, calls
+// __wasm_get_tls_base, otherwise, global.get __tls_base
+MachineSDNode *getTLSBase(SelectionDAG &DAG, const SDLoc &DL,
+                          const WebAssemblySubtarget *Subtarget,
+                          const SDValue Chain = SDValue());
+
 } // end namespace WebAssembly
 
 } // end namespace llvm

diff  --git a/llvm/test/CodeGen/WebAssembly/stack-abi.ll 
b/llvm/test/CodeGen/WebAssembly/stack-abi.ll
new file mode 100644
index 0000000000000..684abb9d80028
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/stack-abi.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s --mtriple=wasm32-wasip3 -asm-verbose=false | FileCheck 
--check-prefix=LIBCALL %s
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false | 
FileCheck --check-prefix=GLOBAL %s
+
+declare void @force_sp_save()
+define void @use_stack() #0 {
+  %1 = alloca i32, align 4
+  %2 = alloca ptr, align 4
+  store ptr %1, ptr %2, align 4
+  call void @force_sp_save()
+  ret void
+}
+
+; LIBCALL-LABEL: use_stack:
+; LIBCALL: call __wasm_get_stack_pointer
+; LIBCALL: call __wasm_set_stack_pointer
+; LIBCALL-NOT: global.get __stack_pointer
+; LIBCALL-NOT: global.set __stack_pointer
+
+; GLOBAL-LABEL: use_stack:
+; GLOBAL: global.get __stack_pointer
+; GLOBAL: global.set __stack_pointer
+; GLOBAL-NOT: call __wasm_get_stack_pointer
+; GLOBAL-NOT: call __wasm_set_stack_pointer
+

diff  --git a/llvm/test/CodeGen/WebAssembly/thread_pointer.ll 
b/llvm/test/CodeGen/WebAssembly/thread_pointer.ll
index 18716988673db..875f0f4c84c39 100644
--- a/llvm/test/CodeGen/WebAssembly/thread_pointer.ll
+++ b/llvm/test/CodeGen/WebAssembly/thread_pointer.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=wasm32-unknown-unknown | FileCheck %s 
--check-prefix=WASM32
 ; RUN: llc < %s -mtriple=wasm64-unknown-unknown | FileCheck %s 
--check-prefix=WASM64
+; RUN: llc < %s -mtriple=wasm32-wasip3 | FileCheck %s 
--check-prefix=WASM32-LIBCALL
 
 declare ptr @llvm.thread.pointer()
 
@@ -16,6 +17,13 @@ define ptr @thread_pointer() nounwind {
 ; WASM64-NEXT:  # %bb.0:
 ; WASM64-NEXT:    global.get __tls_base
 ; WASM64-NEXT:    # fallthrough-return
+;
+; WASM32-LIBCALL-LABEL: thread_pointer:
+; WASM32-LIBCALL:         .functype thread_pointer () -> (i32)
+; WASM32-LIBCALL-NEXT:  # %bb.0:
+; WASM32-LIBCALL-NEXT:    call __wasm_get_tls_base
+; WASM32-LIBCALL-NEXT:    # fallthrough-return
+;
   %1 = tail call ptr @llvm.thread.pointer()
   ret ptr %1
 }

diff  --git a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll 
b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll
index dc0d40c7973ad..1807ea2263338 100644
--- a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll
+++ b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll
@@ -1,13 +1,16 @@
 ; Run the tests with the `localexec` TLS mode specified.
 ; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false 
-disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals 
-mattr=+bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,TLS %s
 ; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false 
-disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals 
-mattr=+bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS 
%s
+; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc 
-mtriple=wasm32-wasip3 -asm-verbose=false -disable-wasm-fallthrough-return-opt 
-wasm-disable-explicit-locals -mattr=bulk-memory,atomics -fast-isel - | 
FileCheck --check-prefixes=CHECK,TLS-LIBCALL %s
 
 ; Also, run the same tests without a specified TLS mode--this should still 
emit `localexec` code on non-Emscripten targtes which don't currently support 
dynamic linking.
 ; RUN: sed -e 's/\[\[TLS_MODE\]\]//' %s | llc -asm-verbose=false 
-disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals 
-mattr=+bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,TLS %s
 ; RUN: sed -e 's/\[\[TLS_MODE\]\]//' %s | llc -asm-verbose=false 
-disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals 
-mattr=+bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS 
%s
+; RUN: sed -e 's/\[\[TLS_MODE\]\]//' %s | llc -mtriple=wasm32-wasip3 
-asm-verbose=false -disable-wasm-fallthrough-return-opt 
-wasm-disable-explicit-locals -mattr=bulk-memory,atomics -fast-isel - | 
FileCheck --check-prefixes=CHECK,TLS-LIBCALL %s
 
 ; Finally, when bulk memory is disabled, no TLS code should be generated.
 ; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false 
-disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals 
-mattr=-bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,NO-TLS %s
+; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc 
-mtriple=wasm32-wasip3 -asm-verbose=false -disable-wasm-fallthrough-return-opt 
-wasm-disable-explicit-locals -mattr=-bulk-memory,atomics - | FileCheck 
--check-prefixes=CHECK,NO-TLS %s
 target triple = "wasm32-unknown-unknown"
 
 ; CHECK-LABEL: address_of_tls:
@@ -18,6 +21,11 @@ define i32 @address_of_tls() {
   ; TLS-NEXT: i32.add
   ; TLS-NEXT: return
 
+  ; TLS-LIBCALL-DAG: call __wasm_get_tls_base
+  ; TLS-LIBCALL-DAG: i32.const tls@TLSREL
+  ; TLS-LIBCALL-NEXT: i32.add
+  ; TLS-LIBCALL-NEXT: return
+
   ; NO-TLS-NEXT: i32.const tls
   ; NO-TLS-NEXT: return
   %p = call ptr @llvm.threadlocal.address.p0(ptr @tls)
@@ -33,6 +41,11 @@ define i32 @address_of_tls_external() {
   ; TLS-NEXT: i32.add
   ; TLS-NEXT: return
 
+  ; TLS-LIBCALL-DAG: call __wasm_get_tls_base
+  ; TLS-LIBCALL-DAG: i32.const tls_external@TLSREL
+  ; TLS-LIBCALL-NEXT: i32.add
+  ; TLS-LIBCALL-NEXT: return
+
   ; NO-TLS-NEXT: i32.const tls_external
   ; NO-TLS-NEXT: return
   %p = call ptr @llvm.threadlocal.address.p0(ptr @tls_external)
@@ -48,6 +61,11 @@ define ptr @ptr_to_tls() {
   ; TLS-NEXT: i32.add
   ; TLS-NEXT: return
 
+  ; TLS-LIBCALL-DAG: call __wasm_get_tls_base
+  ; TLS-LIBCALL-DAG: i32.const tls@TLSREL
+  ; TLS-LIBCALL-NEXT: i32.add
+  ; TLS-LIBCALL-NEXT: return
+
   ; NO-TLS-NEXT: i32.const tls
   ; NO-TLS-NEXT: return
   %p = call ptr @llvm.threadlocal.address.p0(ptr @tls)
@@ -63,6 +81,12 @@ define i32 @tls_load() {
   ; TLS-NEXT: i32.load 0
   ; TLS-NEXT: return
 
+  ; TLS-LIBCALL-DAG: call __wasm_get_tls_base
+  ; TLS-LIBCALL-DAG: i32.const tls@TLSREL
+  ; TLS-LIBCALL-NEXT: i32.add
+  ; TLS-LIBCALL-NEXT: i32.load 0
+  ; TLS-LIBCALL-NEXT: return
+
   ; NO-TLS-NEXT: i32.const 0
   ; NO-TLS-NEXT: i32.load tls
   ; NO-TLS-NEXT: return
@@ -80,6 +104,12 @@ define void @tls_store(i32 %x) {
   ; TLS-NEXT: i32.store 0
   ; TLS-NEXT: return
 
+  ; TLS-LIBCALL-DAG: call __wasm_get_tls_base
+  ; TLS-LIBCALL-DAG: i32.const tls@TLSREL
+  ; TLS-LIBCALL-NEXT: i32.add
+  ; TLS-LIBCALL-NEXT: i32.store 0
+  ; TLS-LIBCALL-NEXT: return
+
   ; NO-TLS-NEXT: i32.const 0
   ; NO-TLS-NEXT: i32.store tls
   ; NO-TLS-NEXT: return
@@ -99,6 +129,7 @@ define i32 @tls_size() {
 
 ; CHECK: .type tls,@object
 ; TLS-NEXT: .section .tbss.tls,"T",@
+; TLS-LIBCALL-NEXT: .section .tbss.tls,"T",@
 ; NO-TLS-NEXT: .section .bss.tls,"",@
 ; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: tls:

diff  --git a/llvm/test/DebugInfo/WebAssembly/thread-context-abi.ll 
b/llvm/test/DebugInfo/WebAssembly/thread-context-abi.ll
new file mode 100644
index 0000000000000..641529c209559
--- /dev/null
+++ b/llvm/test/DebugInfo/WebAssembly/thread-context-abi.ll
@@ -0,0 +1,39 @@
+; Ensure that using libcall thread context with an empty function produces a 
frame base 
+; that uses a local, and that using the global thread context produces a frame 
base that 
+; uses the __stack_pointer global.
+
+; Test generated via: clang --target=wasm32-unknown-unknown-wasm foo.c -g -O2
+; void foo() {}
+
+; RUN: llc < %s -filetype=obj -mtriple=wasm32-wasip3 -o - | llvm-dwarfdump - | 
FileCheck %s --check-prefix=LIBCALL
+; RUN: llc < %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s 
--check-prefix=GLOBAL
+
+target datalayout = 
"e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20"
+target triple = "wasm32-unknown-unknown"
+
+define hidden void @foo() local_unnamed_addr #0 !dbg !9 {
+  ret void
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn 
memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="generic" 
"target-features"="+bulk-memory,+bulk-memory-opt,+call-indirect-overlong,+multivalue,+mutable-globals,+nontrapping-fptoint,+reference-types,+sign-ext"
 }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+!llvm.errno.tbaa = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang 
version 23.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, 
splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{!"clang version 23.0.0"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
+!9 = distinct !DISubprogram(name: "caller", scope: !1, file: !1, line: 2, 
type: !10, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: 
DISPFlagDefinition | DISPFlagOptimized, unit: !0, keyInstructions: true)
+!10 = !DISubroutineType(types: !11)
+!11 = !{null}
+
+; LIBCALL: DW_AT_frame_base        (DW_OP_WASM_location 0x0 0x0, 
DW_OP_stack_value)
+; GLOBAL: DW_AT_frame_base        (DW_OP_WASM_location 0x3 0x0, 
DW_OP_stack_value)


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to