llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-lldb Author: Jonas Devlieghere (JDevlieghere) <details> <summary>Changes</summary> This PR adds support for parsing the WebAssembly symbol table. The symbol table is encoded in the "names" section and contains names and indexes into other sections. For now we only support parsing function (code) symbols. The result is that you can set breakpoints by symbol name, while previously breakpoints by name required debug info (DWARF). This is also necessary for Swift, which checks for the presence of `swift_release` as a heuristic to determine if there's a static Swift stdlib. --- Full diff: https://github.com/llvm/llvm-project/pull/153093.diff 7 Files Affected: - (modified) lldb/include/lldb/lldb-enumerations.h (+1) - (modified) lldb/source/Core/Section.cpp (+3) - (modified) lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp (+1) - (modified) lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp (+96-23) - (modified) lldb/source/Symbol/ObjectFile.cpp (+1) - (added) lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml (+210) - (added) lldb/test/Shell/Symtab/symtab-wasm.test (+7) ``````````diff diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index c63c1f03e58da..fec9fdef44df9 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -777,6 +777,7 @@ enum SectionType { eSectionTypeLLDBTypeSummaries, eSectionTypeLLDBFormatters, eSectionTypeSwiftModules, + eSectionTypeWasmName, }; FLAGS_ENUM(EmulateInstructionOptions){ diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index 27dcf987b0278..02d9d86fe5374 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -153,6 +153,8 @@ const char *Section::GetTypeAsCString() const { return "lldb-formatters"; case eSectionTypeSwiftModules: return "swift-modules"; + case eSectionTypeWasmName: + return "wasm-name"; case eSectionTypeOther: return "regular"; } @@ -415,6 +417,7 @@ bool Section::ContainsOnlyDebugInfo() const { case eSectionTypeCompactUnwind: case eSectionTypeGoSymtab: case eSectionTypeAbsoluteAddress: + case eSectionTypeWasmName: case eSectionTypeOther: // Used for "__dof_cache" in mach-o or ".debug" for COFF which isn't debug // information that we parse at all. This was causing system files with no diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 13df6e2f26b53..d7cb60e3f0c38 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1156,6 +1156,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { case eSectionTypeDataObjCMessageRefs: case eSectionTypeDataObjCCFStrings: case eSectionTypeGoSymtab: + case eSectionTypeWasmName: return AddressClass::eData; case eSectionTypeDebug: diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index b1efd25949379..30fec5dd759e7 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -50,7 +50,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) { return version == llvm::wasm::WasmVersion; } -static std::optional<ConstString> +// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor. +static std::optional<std::string> GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { // A Wasm string is encoded as a vector of UTF-8 codes. // Vectors are encoded with their u32 length followed by the element @@ -72,8 +73,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { return std::nullopt; } - llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage)); - return ConstString(str); + return std::string(toStringRef(llvm::ArrayRef(str_storage))); } char ObjectFileWasm::ID; @@ -182,7 +182,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { // identifying the custom section, followed by an uninterpreted sequence // of bytes. lldb::offset_t prev_offset = c.tell(); - std::optional<ConstString> sect_name = GetWasmString(data, c); + std::optional<std::string> sect_name = GetWasmString(data, c); if (!sect_name) return false; @@ -191,7 +191,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { uint32_t section_length = payload_len - (c.tell() - prev_offset); m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length, - section_id, *sect_name}); + section_id, ConstString(*sect_name)}); *offset_ptr += (c.tell() + section_length); } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) { m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), @@ -248,12 +248,93 @@ bool ObjectFileWasm::ParseHeader() { return true; } -void ObjectFileWasm::ParseSymtab(Symtab &symtab) {} +static std::vector<AddressRange> ParseFunctions(SectionSP code_section_sp) { + DataExtractor code_section_data; + code_section_sp->GetSectionData(code_section_data); + lldb::offset_t offset = 0; + + const uint32_t function_count = code_section_data.GetULEB128(&offset); + + std::vector<AddressRange> functions; + functions.reserve(function_count); + + for (uint32_t i = 0; i < function_count; ++i) { + const uint32_t function_size = code_section_data.GetULEB128(&offset); + // llvm-objdump considers the ULEB with the function size to be part of the + // function. We can't do that here because that would break symbolic + // breakpoints, as that address is never executed. + functions.emplace_back(code_section_sp, offset, function_size); + offset += function_size; + } + + return functions; +} + +void ObjectFileWasm::ParseSymtab(Symtab &symtab) { + assert(m_sections_up && "sections must be parsed"); + Log *log = GetLog(LLDBLog::Object); + + // The name section contains names and indexes. First parse the functions from + // the code section so we can access them by their index. + SectionSP code_section_sp = + m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false); + if (!code_section_sp) + return; + std::vector<AddressRange> functions = ParseFunctions(code_section_sp); + + // Parse the name section. + SectionSP name_section_sp = + m_sections_up->FindSectionByType(lldb::eSectionTypeWasmName, false); + if (!name_section_sp) + return; + + DataExtractor name_section_data; + name_section_sp->GetSectionData(name_section_data); + + llvm::DataExtractor data = name_section_data.GetAsLLVM(); + llvm::DataExtractor::Cursor c(0); + uint32_t sym_id = 0; + while (c && c.tell() < data.size()) { + const uint8_t type = data.getU8(c); + const uint32_t size = data.getULEB128(c); + switch (type) { + case llvm::wasm::WASM_NAMES_FUNCTION: { + const uint32_t count = data.getULEB128(c); + for (size_t i = 0; c && i < count; ++i) { + const uint32_t idx = data.getULEB128(c); + const std::optional<std::string> name = GetWasmString(data, c); + if (!name || idx >= functions.size()) + continue; + symtab.AddSymbol(Symbol( + sym_id++, Mangled(*name), lldb::eSymbolTypeCode, + /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false, + /*is_artificial=*/false, functions[idx], + /*size_is_valid=*/true, /*contains_linker_annotations=*/false, + /*flags=*/0)); + } + } break; + case llvm::wasm::WASM_NAMES_DATA_SEGMENT: + case llvm::wasm::WASM_NAMES_GLOBAL: + case llvm::wasm::WASM_NAMES_LOCAL: + default: + c.seek(c.tell() + size); + } + } + + if (!c) { + LLDB_LOG_ERROR(log, c.takeError(), + "Failed to parse the Wasm symbol table: {0}"); + return; + } + + symtab.Finalize(); +} static SectionType GetSectionTypeFromName(llvm::StringRef Name) { - if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) { + if (Name == "name") + return lldb::eSectionTypeWasmName; + if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) return ObjectFile::GetDWARFSectionTypeFromName(Name); - } return eSectionTypeOther; } @@ -263,13 +344,12 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { m_sections_up = std::make_unique<SectionList>(); - if (m_sect_infos.empty()) { + if (m_sect_infos.empty()) DecodeSections(); - } for (const section_info §_info : m_sect_infos) { - SectionType section_type = eSectionTypeOther; - ConstString section_name; + SectionType section_type = GetSectionTypeFromName(sect_info.name); + ConstString section_name = sect_info.name; offset_t file_offset = sect_info.offset & 0xffffffff; addr_t vm_addr = file_offset; size_t vm_size = sect_info.size; @@ -283,15 +363,8 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { // For this reason Section::GetFileAddress() must return zero for the // Code section. vm_addr = 0; - } else { - section_type = GetSectionTypeFromName(sect_info.name.GetStringRef()); - if (section_type == eSectionTypeOther) - continue; - section_name = sect_info.name; - if (!IsInMemory()) { - vm_size = 0; - vm_addr = 0; - } + } else if (section_type == eSectionTypeOther) { + continue; } SectionSP section_sp( @@ -397,9 +470,9 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { ReadImageData(sect_info.offset, kBufferSize); llvm::DataExtractor data = section_header_data.GetAsLLVM(); llvm::DataExtractor::Cursor c(0); - std::optional<ConstString> symbols_url = GetWasmString(data, c); + std::optional<std::string> symbols_url = GetWasmString(data, c); if (symbols_url) - return FileSpec(symbols_url->GetStringRef()); + return FileSpec(*symbols_url); } } return std::nullopt; diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 21daf7476b522..7efce2a035505 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -379,6 +379,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) { case eSectionTypeELFDynamicSymbols: case eSectionTypeELFRelocationEntries: case eSectionTypeELFDynamicLinkInfo: + case eSectionTypeWasmName: case eSectionTypeOther: return AddressClass::eUnknown; case eSectionTypeAbsoluteAddress: diff --git a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml new file mode 100644 index 0000000000000..165bb53662f40 --- /dev/null +++ b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml @@ -0,0 +1,210 @@ +--- !WASM +FileHeader: + Version: 0x1 +Sections: + - Type: TYPE + Signatures: + - Index: 0 + ParamTypes: [] + ReturnTypes: [] + - Index: 1 + ParamTypes: + - I32 + - I32 + ReturnTypes: + - I32 + - Index: 2 + ParamTypes: [] + ReturnTypes: + - I32 + - Type: FUNCTION + FunctionTypes: [ 0, 1, 2, 1 ] + - Type: TABLE + Tables: + - Index: 0 + ElemType: FUNCREF + Limits: + Flags: [ HAS_MAX ] + Minimum: 0x1 + Maximum: 0x1 + - Type: MEMORY + Memories: + - Minimum: 0x2 + - Type: GLOBAL + Globals: + - Index: 0 + Type: I32 + Mutable: true + InitExpr: + Opcode: I32_CONST + Value: 66560 + - Index: 1 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 2 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 3 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 4 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 66560 + - Index: 5 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 6 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 66560 + - Index: 7 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 131072 + - Index: 8 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 0 + - Index: 9 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1 + - Index: 10 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 65536 + - Type: EXPORT + Exports: + - Name: memory + Kind: MEMORY + Index: 0 + - Name: __wasm_call_ctors + Kind: FUNCTION + Index: 0 + - Name: add + Kind: FUNCTION + Index: 1 + - Name: __original_main + Kind: FUNCTION + Index: 2 + - Name: main + Kind: FUNCTION + Index: 3 + - Name: __main_void + Kind: FUNCTION + Index: 2 + - Name: __indirect_function_table + Kind: TABLE + Index: 0 + - Name: __dso_handle + Kind: GLOBAL + Index: 1 + - Name: __data_end + Kind: GLOBAL + Index: 2 + - Name: __stack_low + Kind: GLOBAL + Index: 3 + - Name: __stack_high + Kind: GLOBAL + Index: 4 + - Name: __global_base + Kind: GLOBAL + Index: 5 + - Name: __heap_base + Kind: GLOBAL + Index: 6 + - Name: __heap_end + Kind: GLOBAL + Index: 7 + - Name: __memory_base + Kind: GLOBAL + Index: 8 + - Name: __table_base + Kind: GLOBAL + Index: 9 + - Name: __wasm_first_page_end + Kind: GLOBAL + Index: 10 + - Type: CODE + Functions: + - Index: 0 + Locals: [] + Body: 0B + - Index: 1 + Locals: + - Type: I32 + Count: 1 + Body: 23808080800041106B21022002200036020C20022001360208200228020C20022802086A0F0B + - Index: 2 + Locals: + - Type: I32 + Count: 2 + Body: 23808080800041106B210020002480808080002000410036020C2000410136020820004102360204200028020820002802041081808080002101200041106A24808080800020010F0B + - Index: 3 + Locals: [] + Body: 1082808080000F0B + - Type: CUSTOM + Name: name + FunctionNames: + - Index: 0 + Name: __wasm_call_ctors + - Index: 1 + Name: add + - Index: 2 + Name: __original_main + - Index: 3 + Name: main + GlobalNames: + - Index: 0 + Name: __stack_pointer + - Type: CUSTOM + Name: producers + Tools: + - Name: clang + Version: '22.0.0git' + - Type: CUSTOM + Name: target_features + Features: + - Prefix: USED + Name: bulk-memory + - Prefix: USED + Name: bulk-memory-opt + - Prefix: USED + Name: call-indirect-overlong + - Prefix: USED + Name: multivalue + - Prefix: USED + Name: mutable-globals + - Prefix: USED + Name: nontrapping-fptoint + - Prefix: USED + Name: reference-types + - Prefix: USED + Name: sign-ext +... diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test new file mode 100644 index 0000000000000..fc185cd81a0ec --- /dev/null +++ b/lldb/test/Shell/Symtab/symtab-wasm.test @@ -0,0 +1,7 @@ +# RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm +# RUN: %lldb %t.wasm -o 'image dump symtab' + +# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors +# CHECK: Code 0x0000000000000005 {{.*}} add +# CHECK: Code 0x000000000000002f {{.*}} __original_main +# CHECK: Code 0x000000000000007c {{.*}} main `````````` </details> https://github.com/llvm/llvm-project/pull/153093 _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits