Author: Jonas Devlieghere
Date: 2025-08-12T15:12:30-05:00
New Revision: 5be2063e1083773d4863d94d357f0e890c646fde

URL: 
https://github.com/llvm/llvm-project/commit/5be2063e1083773d4863d94d357f0e890c646fde
DIFF: 
https://github.com/llvm/llvm-project/commit/5be2063e1083773d4863d94d357f0e890c646fde.diff

LOG: [lldb] Support parsing the Wasm symbol table (#153093)

This PR adds support for parsing the WebAssembly symbol table. The
symbol table is encoded in the "names" section and contains names and
indexes into other sections. For now we only support parsing function
(code) symbols. The result is that you can set breakpoints by symbol
name, while previously breakpoints by name required debug info (DWARF).

This is also necessary for Swift, which checks for the presence of
`swift_release` as a heuristic to determine if there's a static Swift
stdlib.

Added: 
    lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
    lldb/test/Shell/Symtab/symtab-wasm.test

Modified: 
    lldb/include/lldb/lldb-enumerations.h
    lldb/source/Core/Section.cpp
    lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
    lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
    lldb/source/Symbol/ObjectFile.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/lldb-enumerations.h 
b/lldb/include/lldb/lldb-enumerations.h
index c63c1f03e58da..fec9fdef44df9 100644
--- a/lldb/include/lldb/lldb-enumerations.h
+++ b/lldb/include/lldb/lldb-enumerations.h
@@ -777,6 +777,7 @@ enum SectionType {
   eSectionTypeLLDBTypeSummaries,
   eSectionTypeLLDBFormatters,
   eSectionTypeSwiftModules,
+  eSectionTypeWasmName,
 };
 
 FLAGS_ENUM(EmulateInstructionOptions){

diff  --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp
index 27dcf987b0278..02d9d86fe5374 100644
--- a/lldb/source/Core/Section.cpp
+++ b/lldb/source/Core/Section.cpp
@@ -153,6 +153,8 @@ const char *Section::GetTypeAsCString() const {
     return "lldb-formatters";
   case eSectionTypeSwiftModules:
     return "swift-modules";
+  case eSectionTypeWasmName:
+    return "wasm-name";
   case eSectionTypeOther:
     return "regular";
   }
@@ -415,6 +417,7 @@ bool Section::ContainsOnlyDebugInfo() const {
   case eSectionTypeCompactUnwind:
   case eSectionTypeGoSymtab:
   case eSectionTypeAbsoluteAddress:
+  case eSectionTypeWasmName:
   case eSectionTypeOther:
   // Used for "__dof_cache" in mach-o or ".debug" for COFF which isn't debug
   // information that we parse at all. This was causing system files with no

diff  --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp 
b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 13df6e2f26b53..d7cb60e3f0c38 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -1156,6 +1156,7 @@ AddressClass 
ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) {
         case eSectionTypeDataObjCMessageRefs:
         case eSectionTypeDataObjCCFStrings:
         case eSectionTypeGoSymtab:
+        case eSectionTypeWasmName:
           return AddressClass::eData;
 
         case eSectionTypeDebug:

diff  --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp 
b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index b1efd25949379..a489b05acfcb4 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Magic.h"
 #include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/Support/CheckedArithmetic.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Format.h"
 #include <optional>
@@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) 
{
   return version == llvm::wasm::WasmVersion;
 }
 
-static std::optional<ConstString>
+// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
+static std::optional<std::string>
 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
   // A Wasm string is encoded as a vector of UTF-8 codes.
   // Vectors are encoded with their u32 length followed by the element
@@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, 
llvm::DataExtractor::Cursor &c) {
     return std::nullopt;
   }
 
-  llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage));
-  return ConstString(str);
+  return std::string(toStringRef(llvm::ArrayRef(str_storage)));
 }
 
 char ObjectFileWasm::ID;
@@ -182,7 +183,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t 
*offset_ptr) {
     // identifying the custom section, followed by an uninterpreted sequence
     // of bytes.
     lldb::offset_t prev_offset = c.tell();
-    std::optional<ConstString> sect_name = GetWasmString(data, c);
+    std::optional<std::string> sect_name = GetWasmString(data, c);
     if (!sect_name)
       return false;
 
@@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t 
*offset_ptr) {
 
     uint32_t section_length = payload_len - (c.tell() - prev_offset);
     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
-                                        section_id, *sect_name});
+                                        section_id, ConstString(*sect_name)});
     *offset_ptr += (c.tell() + section_length);
   } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
     m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
@@ -248,12 +249,136 @@ bool ObjectFileWasm::ParseHeader() {
   return true;
 }
 
-void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
+static llvm::Expected<std::vector<AddressRange>>
+ParseFunctions(SectionSP code_section_sp) {
+  DataExtractor code_section_data;
+  code_section_sp->GetSectionData(code_section_data);
+  lldb::offset_t offset = 0;
+
+  const uint64_t function_count = code_section_data.GetULEB128(&offset);
+  if (function_count >= std::numeric_limits<uint32_t>::max())
+    return llvm::createStringError("function count overflows uint32_t");
+
+  std::vector<AddressRange> functions;
+  functions.reserve(function_count);
+
+  for (uint32_t i = 0; i < function_count; ++i) {
+    const uint64_t function_size = code_section_data.GetULEB128(&offset);
+    if (function_size >= std::numeric_limits<uint32_t>::max())
+      return llvm::createStringError("function size overflows uint32_t");
+    // llvm-objdump considers the ULEB with the function size to be part of the
+    // function. We can't do that here because that would break symbolic
+    // breakpoints, as that address is never executed.
+    functions.emplace_back(code_section_sp, offset, function_size);
+
+    std::optional<lldb::offset_t> next_offset =
+        llvm::checkedAddUnsigned(offset, function_size);
+    if (!next_offset)
+      return llvm::createStringError("function offset overflows uint64_t");
+    offset = *next_offset;
+  }
+
+  return functions;
+}
+
+static llvm::Expected<std::vector<Symbol>>
+ParseNames(SectionSP name_section_sp,
+           const std::vector<AddressRange> &functions) {
+  DataExtractor name_section_data;
+  name_section_sp->GetSectionData(name_section_data);
+
+  llvm::DataExtractor data = name_section_data.GetAsLLVM();
+  llvm::DataExtractor::Cursor c(0);
+  std::vector<Symbol> symbols;
+  while (c && c.tell() < data.size()) {
+    const uint8_t type = data.getU8(c);
+    const uint64_t size = data.getULEB128(c);
+    if (size >= std::numeric_limits<uint32_t>::max())
+      return llvm::createStringError("size overflows uint32_t");
+
+    switch (type) {
+    case llvm::wasm::WASM_NAMES_FUNCTION: {
+      const uint64_t count = data.getULEB128(c);
+      if (count >= std::numeric_limits<uint32_t>::max())
+        return llvm::createStringError("function count overflows uint32_t");
+
+      for (uint64_t i = 0; c && i < count; ++i) {
+        const uint64_t idx = data.getULEB128(c);
+        const std::optional<std::string> name = GetWasmString(data, c);
+        if (!name || idx >= functions.size())
+          continue;
+        symbols.emplace_back(
+            symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
+            /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
+            /*is_artificial=*/false, functions[idx],
+            /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
+            /*flags=*/0);
+      }
+    } break;
+    case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
+    case llvm::wasm::WASM_NAMES_GLOBAL:
+    case llvm::wasm::WASM_NAMES_LOCAL:
+    default:
+      std::optional<uint64_t> offset = llvm::checkedAddUnsigned(c.tell(), 
size);
+      if (!offset)
+        return llvm::createStringError("offset overflows uint64_t");
+      c.seek(*offset);
+    }
+  }
+
+  if (!c)
+    return c.takeError();
+
+  return symbols;
+}
+
+void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
+  assert(m_sections_up && "sections must be parsed");
+  Log *log = GetLog(LLDBLog::Object);
+
+  // The name section contains names and indexes. First parse the functions 
from
+  // the code section so we can access them by their index.
+  SectionSP code_section_sp =
+      m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
+  if (!code_section_sp) {
+    LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
+    return;
+  }
+
+  llvm::Expected<std::vector<AddressRange>> functions =
+      ParseFunctions(code_section_sp);
+  if (!functions) {
+    LLDB_LOG_ERROR(log, functions.takeError(),
+                   "Failed to parse Wasm functions: {0}");
+    return;
+  }
+
+  // Parse the name section.
+  SectionSP name_section_sp =
+      m_sections_up->FindSectionByType(lldb::eSectionTypeWasmName, false);
+  if (!name_section_sp) {
+    LLDB_LOG(log, "Failed to parse Wasm symbol table: no names section");
+    return;
+  }
+
+  llvm::Expected<std::vector<Symbol>> symbols =
+      ParseNames(name_section_sp, *functions);
+  if (!symbols) {
+    LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: 
{0}");
+    return;
+  }
+
+  for (const Symbol &symbol : *symbols)
+    symtab.AddSymbol(symbol);
+
+  symtab.Finalize();
+}
 
 static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
-  if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
+  if (Name == "name")
+    return lldb::eSectionTypeWasmName;
+  if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_"))
     return ObjectFile::GetDWARFSectionTypeFromName(Name);
-  }
   return eSectionTypeOther;
 }
 
@@ -397,9 +522,9 @@ std::optional<FileSpec> 
ObjectFileWasm::GetExternalDebugInfoFileSpec() {
           ReadImageData(sect_info.offset, kBufferSize);
       llvm::DataExtractor data = section_header_data.GetAsLLVM();
       llvm::DataExtractor::Cursor c(0);
-      std::optional<ConstString> symbols_url = GetWasmString(data, c);
+      std::optional<std::string> symbols_url = GetWasmString(data, c);
       if (symbols_url)
-        return FileSpec(symbols_url->GetStringRef());
+        return FileSpec(*symbols_url);
     }
   }
   return std::nullopt;

diff  --git a/lldb/source/Symbol/ObjectFile.cpp 
b/lldb/source/Symbol/ObjectFile.cpp
index 21daf7476b522..7efce2a035505 100644
--- a/lldb/source/Symbol/ObjectFile.cpp
+++ b/lldb/source/Symbol/ObjectFile.cpp
@@ -379,6 +379,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) {
           case eSectionTypeELFDynamicSymbols:
           case eSectionTypeELFRelocationEntries:
           case eSectionTypeELFDynamicLinkInfo:
+          case eSectionTypeWasmName:
           case eSectionTypeOther:
             return AddressClass::eUnknown;
           case eSectionTypeAbsoluteAddress:

diff  --git a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml 
b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
new file mode 100644
index 0000000000000..165bb53662f40
--- /dev/null
+++ b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
@@ -0,0 +1,210 @@
+--- !WASM
+FileHeader:
+  Version:         0x1
+Sections:
+  - Type:            TYPE
+    Signatures:
+      - Index:           0
+        ParamTypes:      []
+        ReturnTypes:     []
+      - Index:           1
+        ParamTypes:
+          - I32
+          - I32
+        ReturnTypes:
+          - I32
+      - Index:           2
+        ParamTypes:      []
+        ReturnTypes:
+          - I32
+  - Type:            FUNCTION
+    FunctionTypes:   [ 0, 1, 2, 1 ]
+  - Type:            TABLE
+    Tables:
+      - Index:           0
+        ElemType:        FUNCREF
+        Limits:
+          Flags:           [ HAS_MAX ]
+          Minimum:         0x1
+          Maximum:         0x1
+  - Type:            MEMORY
+    Memories:
+      - Minimum:         0x2
+  - Type:            GLOBAL
+    Globals:
+      - Index:           0
+        Type:            I32
+        Mutable:         true
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           66560
+      - Index:           1
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           1024
+      - Index:           2
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           1024
+      - Index:           3
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           1024
+      - Index:           4
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           66560
+      - Index:           5
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           1024
+      - Index:           6
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           66560
+      - Index:           7
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           131072
+      - Index:           8
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           0
+      - Index:           9
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           1
+      - Index:           10
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           65536
+  - Type:            EXPORT
+    Exports:
+      - Name:            memory
+        Kind:            MEMORY
+        Index:           0
+      - Name:            __wasm_call_ctors
+        Kind:            FUNCTION
+        Index:           0
+      - Name:            add
+        Kind:            FUNCTION
+        Index:           1
+      - Name:            __original_main
+        Kind:            FUNCTION
+        Index:           2
+      - Name:            main
+        Kind:            FUNCTION
+        Index:           3
+      - Name:            __main_void
+        Kind:            FUNCTION
+        Index:           2
+      - Name:            __indirect_function_table
+        Kind:            TABLE
+        Index:           0
+      - Name:            __dso_handle
+        Kind:            GLOBAL
+        Index:           1
+      - Name:            __data_end
+        Kind:            GLOBAL
+        Index:           2
+      - Name:            __stack_low
+        Kind:            GLOBAL
+        Index:           3
+      - Name:            __stack_high
+        Kind:            GLOBAL
+        Index:           4
+      - Name:            __global_base
+        Kind:            GLOBAL
+        Index:           5
+      - Name:            __heap_base
+        Kind:            GLOBAL
+        Index:           6
+      - Name:            __heap_end
+        Kind:            GLOBAL
+        Index:           7
+      - Name:            __memory_base
+        Kind:            GLOBAL
+        Index:           8
+      - Name:            __table_base
+        Kind:            GLOBAL
+        Index:           9
+      - Name:            __wasm_first_page_end
+        Kind:            GLOBAL
+        Index:           10
+  - Type:            CODE
+    Functions:
+      - Index:           0
+        Locals:          []
+        Body:            0B
+      - Index:           1
+        Locals:
+          - Type:            I32
+            Count:           1
+        Body:            
23808080800041106B21022002200036020C20022001360208200228020C20022802086A0F0B
+      - Index:           2
+        Locals:
+          - Type:            I32
+            Count:           2
+        Body:            
23808080800041106B210020002480808080002000410036020C2000410136020820004102360204200028020820002802041081808080002101200041106A24808080800020010F0B
+      - Index:           3
+        Locals:          []
+        Body:            1082808080000F0B
+  - Type:            CUSTOM
+    Name:            name
+    FunctionNames:
+      - Index:           0
+        Name:            __wasm_call_ctors
+      - Index:           1
+        Name:            add
+      - Index:           2
+        Name:            __original_main
+      - Index:           3
+        Name:            main
+    GlobalNames:
+      - Index:           0
+        Name:            __stack_pointer
+  - Type:            CUSTOM
+    Name:            producers
+    Tools:
+      - Name:            clang
+        Version:         '22.0.0git'
+  - Type:            CUSTOM
+    Name:            target_features
+    Features:
+      - Prefix:          USED
+        Name:            bulk-memory
+      - Prefix:          USED
+        Name:            bulk-memory-opt
+      - Prefix:          USED
+        Name:            call-indirect-overlong
+      - Prefix:          USED
+        Name:            multivalue
+      - Prefix:          USED
+        Name:            mutable-globals
+      - Prefix:          USED
+        Name:            nontrapping-fptoint
+      - Prefix:          USED
+        Name:            reference-types
+      - Prefix:          USED
+        Name:            sign-ext
+...

diff  --git a/lldb/test/Shell/Symtab/symtab-wasm.test 
b/lldb/test/Shell/Symtab/symtab-wasm.test
new file mode 100644
index 0000000000000..fc185cd81a0ec
--- /dev/null
+++ b/lldb/test/Shell/Symtab/symtab-wasm.test
@@ -0,0 +1,7 @@
+# RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm
+# RUN: %lldb %t.wasm -o 'image dump symtab'
+
+# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors
+# CHECK: Code 0x0000000000000005 {{.*}} add
+# CHECK: Code 0x000000000000002f {{.*}} __original_main
+# CHECK: Code 0x000000000000007c {{.*}} main


        
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to