https://github.com/Nerixyz updated https://github.com/llvm/llvm-project/pull/161678
>From 70c954ae5d95e13aa4b29dca928b7c02b59981fc Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Sun, 20 Jul 2025 13:25:56 +0200 Subject: [PATCH 01/14] [LLDB][NativePDB] Create functions with mangled name --- .../SymbolFile/NativePDB/PdbAstBuilder.cpp | 18 +++-- .../Plugins/SymbolFile/NativePDB/PdbUtil.h | 10 +++ .../NativePDB/SymbolFileNativePDB.cpp | 69 ++++++++++++++++++- .../NativePDB/SymbolFileNativePDB.h | 3 + .../NativePDB/break-by-function.cpp | 6 +- .../SymbolFile/NativePDB/break-by-line.cpp | 2 +- .../SymbolFile/NativePDB/disassembly.cpp | 2 +- .../SymbolFile/NativePDB/local-variables.cpp | 10 +-- .../NativePDB/stack_unwinding01.cpp | 12 ++-- 9 files changed, 110 insertions(+), 22 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index f01fba3c48ce9..5d43684d29e4f 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -38,16 +38,18 @@ struct CreateMethodDecl : public TypeVisitorCallbacks { TypeIndex func_type_index, clang::FunctionDecl *&function_decl, lldb::opaque_compiler_type_t parent_ty, - llvm::StringRef proc_name, CompilerType func_ct) + llvm::StringRef proc_name, ConstString mangled_name, + CompilerType func_ct) : m_index(m_index), m_clang(m_clang), func_type_index(func_type_index), function_decl(function_decl), parent_ty(parent_ty), - proc_name(proc_name), func_ct(func_ct) {} + proc_name(proc_name), mangled_name(mangled_name), func_ct(func_ct) {} PdbIndex &m_index; TypeSystemClang &m_clang; TypeIndex func_type_index; clang::FunctionDecl *&function_decl; lldb::opaque_compiler_type_t parent_ty; llvm::StringRef proc_name; + ConstString mangled_name; CompilerType func_ct; llvm::Error visitKnownMember(CVMemberRecord &cvr, @@ -88,7 +90,7 @@ struct CreateMethodDecl : public TypeVisitorCallbacks { MethodOptions::CompilerGenerated; function_decl = m_clang.AddMethodToCXXRecordType( parent_ty, proc_name, - /*asm_label=*/{}, func_ct, /*access=*/access_type, + mangled_name, func_ct, /*access=*/access_type, /*is_virtual=*/is_virtual, /*is_static=*/is_static, /*is_inline=*/false, /*is_explicit=*/false, /*is_attr_used=*/false, /*is_artificial=*/is_artificial); @@ -891,6 +893,11 @@ PdbAstBuilder::CreateFunctionDecl(PdbCompilandSymId func_id, tag_record = CVTagRecord::create(index.tpi().getType(*eti)).asTag(); } } + + ConstString mangled_name; + if (auto mangled_name_opt = pdb->FindMangledFunctionName(func_id)) + mangled_name = ConstString(*mangled_name_opt); + if (!tag_record.FieldList.isSimple()) { CVType field_list_cvt = index.tpi().getType(tag_record.FieldList); FieldListRecord field_list; @@ -898,7 +905,8 @@ PdbAstBuilder::CreateFunctionDecl(PdbCompilandSymId func_id, field_list_cvt, field_list)) llvm::consumeError(std::move(error)); CreateMethodDecl process(index, m_clang, func_ti, function_decl, - parent_opaque_ty, func_name, func_ct); + parent_opaque_ty, func_name, mangled_name, + func_ct); if (llvm::Error err = visitMemberRecordStream(field_list.Data, process)) llvm::consumeError(std::move(err)); } @@ -906,7 +914,7 @@ PdbAstBuilder::CreateFunctionDecl(PdbCompilandSymId func_id, if (!function_decl) { function_decl = m_clang.AddMethodToCXXRecordType( parent_opaque_ty, func_name, - /*asm_label=*/{}, func_ct, + mangled_name, func_ct, /*access=*/lldb::AccessType::eAccessPublic, /*is_virtual=*/false, /*is_static=*/false, /*is_inline=*/false, /*is_explicit=*/false, diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h index 36e075b04f26f..f09fa7e24f775 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h @@ -99,6 +99,16 @@ struct SegmentOffset { SegmentOffset(uint16_t s, uint32_t o) : segment(s), offset(o) {} uint16_t segment = 0; uint32_t offset = 0; + + bool operator==(SegmentOffset rhs) const { + return segment == rhs.segment && offset == rhs.offset; + } + + bool operator<(SegmentOffset rhs) const { + if (segment == rhs.segment) + return offset < rhs.offset; + return segment < rhs.segment; + } }; struct SegmentOffsetLength { diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index dcea33dd9f854..b24a7025f7772 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -39,6 +39,7 @@ #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/PDB.h" @@ -500,7 +501,9 @@ lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id, return nullptr; PdbTypeSymId sig_id(proc.FunctionType, false); - Mangled mangled(proc.Name); + auto mangled_opt = + FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); + Mangled mangled(mangled_opt.value_or(proc.Name)); FunctionSP func_sp = std::make_shared<Function>( &comp_unit, toOpaqueUid(func_id), toOpaqueUid(sig_id), mangled, func_type.get(), func_addr, @@ -2441,3 +2444,67 @@ SymbolFileNativePDB::GetContextForType(TypeIndex ti) { } return ctx; } + +std::optional<llvm::StringRef> +SymbolFileNativePDB::FindMangledFunctionName(PdbCompilandSymId func_id) { + const CompilandIndexItem *cci = + m_index->compilands().GetCompiland(func_id.modi); + if (!cci) + return std::nullopt; + + CVSymbol sym_record = cci->m_debug_stream.readSymbolAtOffset(func_id.offset); + if (sym_record.kind() != S_LPROC32 && sym_record.kind() != S_GPROC32) + return std::nullopt; + + ProcSym proc(static_cast<SymbolRecordKind>(sym_record.kind())); + cantFail(SymbolDeserializer::deserializeAs<ProcSym>(sym_record, proc)); + return FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); +} + +/// Find the mangled name of a function at \a so. +/// +/// This is similar to the NearestSym function from Microsoft's PDB reference: +/// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581 +/// The main difference is that we search for the exact symbol. +/// +/// \param so[in] The address of the function given by its segment and code +/// offset. +/// \return The mangled function name if found. Otherwise an empty optional. +std::optional<llvm::StringRef> +SymbolFileNativePDB::FindMangledSymbol(SegmentOffset so) { + // The address map is sorted by address, so we do binary search. + // Each element is an offset into the symbols for a public symbol. + auto lo = m_index->publics().getAddressMap().begin(); + auto hi = m_index->publics().getAddressMap().end(); + hi -= 1; + + while (lo < hi) { + auto tgt = lo + ((hi - lo + 1) / 2); + auto val = tgt->value(); + auto sym = m_index->symrecords().readRecord(val); + if (sym.kind() != S_PUB32) + return std::nullopt; // this is most likely corrupted debug info + + PublicSym32 psym = + llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym)); + SegmentOffset cur(psym.Segment, psym.Offset); + if (so < cur) { + tgt -= 1; + hi = tgt; + } else if (so == cur) + return psym.Name; + else + lo = tgt; + } + + // We might've found something, check if it's the symbol we're searching for + auto val = lo->value(); + auto sym = m_index->symrecords().readRecord(val); + if (sym.kind() != S_PUB32) + return std::nullopt; + PublicSym32 psym = + llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym)); + if (psym.Segment != so.segment || psym.Offset != so.offset) + return std::nullopt; + return psym.Name; +} diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h index eda375d4cebe7..ab8f25dc40e86 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h @@ -140,6 +140,9 @@ class SymbolFileNativePDB : public SymbolFileCommon { std::optional<PdbCompilandSymId> FindSymbolScope(PdbCompilandSymId id); + std::optional<llvm::StringRef> FindMangledFunctionName(PdbCompilandSymId id); + std::optional<llvm::StringRef> FindMangledSymbol(SegmentOffset so); + void FindTypes(const lldb_private::TypeQuery &match, lldb_private::TypeResults &results) override; diff --git a/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp b/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp index a580d574a9ca3..d4499373bb860 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp @@ -50,9 +50,9 @@ int main(int argc, char **argv) { // CHECK: 1: name = 'main', locations = 1 // CHECK: 1.1: where = break-by-function.cpp.tmp.exe`main + {{[0-9]+}} // CHECK: 2: name = 'OvlGlobalFn', locations = 3 -// CHECK: 2.1: where = break-by-function.cpp.tmp.exe`OvlGlobalFn + {{[0-9]+}} -// CHECK: 2.2: where = break-by-function.cpp.tmp.exe`OvlGlobalFn -// CHECK: 2.3: where = break-by-function.cpp.tmp.exe`OvlGlobalFn + {{[0-9]+}} +// CHECK: 2.1: where = break-by-function.cpp.tmp.exe`int OvlGlobalFn(int) + {{[0-9]+}} +// CHECK: 2.2: where = break-by-function.cpp.tmp.exe`int OvlGlobalFn(int, int) +// CHECK: 2.3: where = break-by-function.cpp.tmp.exe`int OvlGlobalFn(int, int, int) + {{[0-9]+}} // CHECK: 3: name = 'StaticFn', locations = 1 // CHECK: 3.1: where = break-by-function.cpp.tmp.exe`StaticFn + {{[0-9]+}} // CHECK: 4: name = 'DoesntExist', locations = 0 (pending) diff --git a/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp b/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp index 90ac633b01632..3d7de3275ed65 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp @@ -24,4 +24,4 @@ int main(int argc, char **argv) { // CHECK: (lldb) target create "{{.*}}break-by-line.cpp.tmp.exe" // CHECK: Current executable set to '{{.*}}break-by-line.cpp.tmp.exe' // CHECK: (lldb) break set -f break-by-line.cpp -l 15 -// CHECK: Breakpoint 1: where = break-by-line.cpp.tmp.exe`NS::NamespaceFn + {{[0-9]+}} at break-by-line.cpp:15 +// CHECK: Breakpoint 1: where = break-by-line.cpp.tmp.exe`int NS::NamespaceFn(int) + {{[0-9]+}} at break-by-line.cpp:15 diff --git a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp index db3b85fa7e59f..3603db80ba8a7 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp @@ -27,7 +27,7 @@ int main(int argc, char **argv) { // CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+12>: mov qword ptr [rsp + 0x28], rdx // CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+17>: mov dword ptr [rsp + 0x24], ecx // CHECK: ** 15 foo(); -// CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+21>: call {{.*}} ; foo at disassembly.cpp:12 +// CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+21>: call {{.*}} ; int foo(void) at disassembly.cpp:12 // CHECK: ** 16 return 0; // CHECK-NEXT: 17 } // CHECK-NEXT: 18 diff --git a/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp b/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp index 44a8dc14c6158..f44a5b9dd56e2 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp @@ -55,7 +55,7 @@ int main(int argc, char **argv) { // CHECK-NEXT: (lldb) step // CHECK-NEXT: Process {{.*}} stopped // CHECK-NEXT: * thread #1, stop reason = step in -// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} +// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} // CHECK-NEXT: 6 // CHECK-NEXT: 7 // CHECK-NEXT: 8 int Function(int Param1, char Param2) { @@ -71,7 +71,7 @@ int main(int argc, char **argv) { // CHECK-NEXT: (lldb) step // CHECK-NEXT: Process {{.*}} stopped // CHECK-NEXT: * thread #1, stop reason = step in -// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} +// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} // CHECK-NEXT: 7 // CHECK-NEXT: 8 int Function(int Param1, char Param2) { // CHECK-NEXT: 9 unsigned Local1 = Param1 + 1; @@ -89,7 +89,7 @@ int main(int argc, char **argv) { // CHECK-NEXT: (lldb) step // CHECK-NEXT: Process {{.*}} stopped // CHECK-NEXT: * thread #1, stop reason = step in -// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} +// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} // CHECK-NEXT: 8 int Function(int Param1, char Param2) { // CHECK-NEXT: 9 unsigned Local1 = Param1 + 1; // CHECK-NEXT: 10 char Local2 = Param2 + 1; @@ -109,7 +109,7 @@ int main(int argc, char **argv) { // CHECK-NEXT: (lldb) step // CHECK-NEXT: Process {{.*}} stopped // CHECK-NEXT: * thread #1, stop reason = step in -// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} +// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} // CHECK-NEXT: 9 unsigned Local1 = Param1 + 1; // CHECK-NEXT: 10 char Local2 = Param2 + 1; // CHECK-NEXT: 11 ++Local1; @@ -129,7 +129,7 @@ int main(int argc, char **argv) { // CHECK-NEXT: (lldb) step // CHECK-NEXT: Process {{.*}} stopped // CHECK-NEXT: * thread #1, stop reason = step in -// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} +// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}} // CHECK-NEXT: 10 char Local2 = Param2 + 1; // CHECK-NEXT: 11 ++Local1; // CHECK-NEXT: 12 ++Local2; diff --git a/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp b/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp index 596a826f4a11b..87eeebe7aa1b6 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp @@ -24,19 +24,19 @@ int main(int argc, char **argv) { // CHECK: (lldb) thread backtrace // CHECK-NEXT: * thread #1, stop reason = breakpoint 1.1 -// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12 +// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12 // CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`main(argc={{.*}}, argv={{.*}}) at stack_unwinding01.cpp:20 // CHECK: (lldb) thread backtrace // CHECK-NEXT: * thread #1, stop reason = breakpoint 1.1 -// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12 -// CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12 +// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12 +// CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12 // CHECK-NEXT: frame #2: {{.*}} stack_unwinding01.cpp.tmp.exe`main(argc={{.*}}, argv={{.*}}) at stack_unwinding01.cpp:20 // CHECK: (lldb) thread backtrace // CHECK-NEXT: * thread #1, stop reason = breakpoint 1.1 -// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=4, b=2) at stack_unwinding01.cpp:12 -// CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12 -// CHECK-NEXT: frame #2: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12 +// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=4, b=2) at stack_unwinding01.cpp:12 +// CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12 +// CHECK-NEXT: frame #2: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12 // CHECK-NEXT: frame #3: {{.*}} stack_unwinding01.cpp.tmp.exe`main(argc={{.*}}, argv={{.*}}) at stack_unwinding01.cpp:20 >From 5cb50cc6244722bfd37331e3ad8cdb5ef6fe0f78 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Fri, 8 Aug 2025 21:06:31 +0200 Subject: [PATCH 02/14] fix: use value_or --- lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index 5d43684d29e4f..d6ea2114dcbc5 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -894,9 +894,8 @@ PdbAstBuilder::CreateFunctionDecl(PdbCompilandSymId func_id, } } - ConstString mangled_name; - if (auto mangled_name_opt = pdb->FindMangledFunctionName(func_id)) - mangled_name = ConstString(*mangled_name_opt); + ConstString mangled_name( + pdb->FindMangledFunctionName(func_id).value_or(llvm::StringRef())); if (!tag_record.FieldList.isSimple()) { CVType field_list_cvt = index.tpi().getType(tag_record.FieldList); >From d242da2ce8cf7593ce1c211b779c516709b29152 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Fri, 8 Aug 2025 21:18:01 +0200 Subject: [PATCH 03/14] fix: formatting --- lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index d6ea2114dcbc5..66a92d6f994d5 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -89,8 +89,7 @@ struct CreateMethodDecl : public TypeVisitorCallbacks { bool is_artificial = (options & MethodOptions::CompilerGenerated) == MethodOptions::CompilerGenerated; function_decl = m_clang.AddMethodToCXXRecordType( - parent_ty, proc_name, - mangled_name, func_ct, /*access=*/access_type, + parent_ty, proc_name, mangled_name, func_ct, /*access=*/access_type, /*is_virtual=*/is_virtual, /*is_static=*/is_static, /*is_inline=*/false, /*is_explicit=*/false, /*is_attr_used=*/false, /*is_artificial=*/is_artificial); @@ -912,8 +911,7 @@ PdbAstBuilder::CreateFunctionDecl(PdbCompilandSymId func_id, if (!function_decl) { function_decl = m_clang.AddMethodToCXXRecordType( - parent_opaque_ty, func_name, - mangled_name, func_ct, + parent_opaque_ty, func_name, mangled_name, func_ct, /*access=*/lldb::AccessType::eAccessPublic, /*is_virtual=*/false, /*is_static=*/false, /*is_inline=*/false, /*is_explicit=*/false, >From 7642fdd483bc4505d691c642b6ede1173d72448e Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Thu, 28 Aug 2025 17:43:38 +0200 Subject: [PATCH 04/14] fix: `find-functions` test --- lldb/test/Shell/SymbolFile/NativePDB/find-functions.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lldb/test/Shell/SymbolFile/NativePDB/find-functions.cpp b/lldb/test/Shell/SymbolFile/NativePDB/find-functions.cpp index 3ef7a4c94c287..6204cbd34a588 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/find-functions.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/find-functions.cpp @@ -148,11 +148,12 @@ int main(int argc, char **argv) { // FIND-OVERLOAD-BASE-DAG: FuncType: id = {{.*}}, compiler_type = "int (void)" // FIND-OVERLOAD-BASE-DAG: FuncType: id = {{.*}}, compiler_type = "int (char)" // FIND-OVERLOAD-BASE-DAG: FuncType: id = {{.*}}, compiler_type = "int (char, int, ...)" -// FIND-OVERLOAD-BASE-DAG: Function: id = {{.*}}, name = "Class::overloaded_method" +// FIND-OVERLOAD-BASE-DAG: Function: id = {{.*}}, name = "int Class::overloaded_method(bool)" // FIND-OVERLOAD-BASE-DAG: FuncType: id = {{.*}}, compiler_type = "_Bool (void)" // FIND-OVERLOAD-BASE-DAG: FuncType: id = {{.*}}, compiler_type = "_Bool (int)" // FIND-OVERLOAD-BASE-DAG: FuncType: id = {{.*}}, compiler_type = "int (_Bool)" -// FIND-OVERLOAD-BASE-DAG: Function: id = {{.*}}, name = "overloaded_method" +// FIND-OVERLOAD-BASE-DAG: Function: id = {{.*}}, name = "char overloaded_method(void)" +// FIND-OVERLOAD-BASE-DAG: Function: id = {{.*}}, name = "char overloaded_method(int)" // FIND-OVERLOAD-BASE-DAG: FuncType: id = {{.*}}, compiler_type = "char (void)" // FIND-OVERLOAD-BASE-DAG: FuncType: id = {{.*}}, compiler_type = "char (int)" @@ -160,6 +161,6 @@ int main(int argc, char **argv) { // FIND-OVERLOAD-METHOD-DAG: Function: id = {{.*}}, name = "{{.*}}Struct::overloaded_method{{.*}}" // FIND-OVERLOAD-METHOD-DAG: FuncType: id = {{.*}}, compiler_type = "int (void)" // FIND-OVERLOAD-METHOD-DAG: FuncType: id = {{.*}}, compiler_type = "int (char)" -// FIND-OVERLOAD-METHOD-DAG: Function: id = {{.*}}, name = "Class::overloaded_method" +// FIND-OVERLOAD-METHOD-DAG: Function: id = {{.*}}, name = "bool Class::overloaded_method(void)" // FIND-OVERLOAD-METHOD-DAG: FuncType: id = {{.*}}, compiler_type = "_Bool (void)" // FIND-OVERLOAD-METHOD-DAG: FuncType: id = {{.*}}, compiler_type = "_Bool (int)" >From 5847701f94080c953d9621b07fc29ad7903b9212 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Wed, 10 Sep 2025 17:57:22 +0200 Subject: [PATCH 05/14] fix: use LLVM's symbol lookup --- .../NativePDB/SymbolFileNativePDB.cpp | 47 ++----------------- 1 file changed, 4 insertions(+), 43 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index d30bfa1349ced..1cf7625c4029e 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -2634,52 +2634,13 @@ SymbolFileNativePDB::FindMangledFunctionName(PdbCompilandSymId func_id) { return FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); } -/// Find the mangled name of a function at \a so. -/// -/// This is similar to the NearestSym function from Microsoft's PDB reference: -/// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581 -/// The main difference is that we search for the exact symbol. -/// -/// \param so[in] The address of the function given by its segment and code -/// offset. -/// \return The mangled function name if found. Otherwise an empty optional. std::optional<llvm::StringRef> SymbolFileNativePDB::FindMangledSymbol(SegmentOffset so) { - // The address map is sorted by address, so we do binary search. - // Each element is an offset into the symbols for a public symbol. - auto lo = m_index->publics().getAddressMap().begin(); - auto hi = m_index->publics().getAddressMap().end(); - hi -= 1; - - while (lo < hi) { - auto tgt = lo + ((hi - lo + 1) / 2); - auto val = tgt->value(); - auto sym = m_index->symrecords().readRecord(val); - if (sym.kind() != S_PUB32) - return std::nullopt; // this is most likely corrupted debug info - - PublicSym32 psym = - llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym)); - SegmentOffset cur(psym.Segment, psym.Offset); - if (so < cur) { - tgt -= 1; - hi = tgt; - } else if (so == cur) - return psym.Name; - else - lo = tgt; - } - - // We might've found something, check if it's the symbol we're searching for - auto val = lo->value(); - auto sym = m_index->symrecords().readRecord(val); - if (sym.kind() != S_PUB32) - return std::nullopt; - PublicSym32 psym = - llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym)); - if (psym.Segment != so.segment || psym.Offset != so.offset) + auto symbol = m_index->publics().findByAddress(m_index->symrecords(), + so.segment, so.offset); + if (!symbol) return std::nullopt; - return psym.Name; + return symbol->first.Name; } void SymbolFileNativePDB::CacheUdtDeclarations() { >From 6c8000858e22caf12c99e3c40e4fe72304d99ac2 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Wed, 10 Sep 2025 17:58:00 +0200 Subject: [PATCH 06/14] fix: check for mangled or demangled names in `variables.test` --- lldb/test/Shell/SymbolFile/PDB/variables.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/test/Shell/SymbolFile/PDB/variables.test b/lldb/test/Shell/SymbolFile/PDB/variables.test index 970d714c29c3b..cb761de38241c 100644 --- a/lldb/test/Shell/SymbolFile/PDB/variables.test +++ b/lldb/test/Shell/SymbolFile/PDB/variables.test @@ -42,7 +42,7 @@ GLOBALS-DAG: Variable{{.*}}, name = "g_Const" GLOBALS-SAME: scope = ??? (2) GLOBALS: Function -FUNC-F: Function{{.*}}, {{mangled = \?f@@YAHHH@Z|demangled = f}} +FUNC-F: Function{{.*}}, mangled = ?f@@YAHHH@Z FUNC-F-NEXT: Block FUNC-F-NEXT: Variable{{.*}}, name = "var_arg1" FUNC-F-SAME: scope = parameter @@ -64,14 +64,14 @@ FUNC-MAIN-SAME: scope = local FUNC-MAIN-NEXT: Variable{{.*}}, name = "a" FUNC-MAIN-SAME: scope = local -FUNC-CONSTRUCTOR: Function{{.*}}, {{(de)?}}mangled = {{.*}}Class::Class{{.*}} +FUNC-CONSTRUCTOR: Function{{.*}}, {{mangled = \?\?0Class@@QEAA@H@Z|demangled = .*Class::Class}} FUNC-CONSTRUCTOR-NEXT: Block FUNC-CONSTRUCTOR-NEXT: Variable{{.*}}, name = "this" FUNC-CONSTRUCTOR-SAME: scope = parameter FUNC-CONSTRUCTOR-NEXT: Variable{{.*}}, name = "a" FUNC-CONSTRUCTOR-SAME: scope = parameter -FUNC-MEMBER: Function{{.*}}, {{(de)?}}mangled = {{.*}}{{(Class::)?}}Func{{.*}} +FUNC-MEMBER: Function{{.*}}, {{mangled = \?Func@Class@@QEAAXXZ|demangled = .*Class::Func}} FUNC-MEMBER-NEXT: Block FUNC-MEMBER-NEXT: Variable{{.*}}, name = "this" FUNC-MEMBER-SAME: scope = parameter >From 15482a4fceddbcde36adf149b151763dc6b4a8a9 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Wed, 24 Sep 2025 13:33:28 +0200 Subject: [PATCH 07/14] fix: check for `main` function name --- lldb/test/Shell/SymbolFile/PDB/function-nested-block.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test index 9057d01c25840..4a2355bf23c9a 100644 --- a/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test +++ b/lldb/test/Shell/SymbolFile/PDB/function-nested-block.test @@ -4,7 +4,7 @@ RUN: lldb-test symbols -find=function -file FunctionNestedBlockTest.cpp -line 4 RUN: lldb-test symbols -find=block -file FunctionNestedBlockTest.cpp -line 4 %t.exe | FileCheck --check-prefix=CHECK-BLOCK %s CHECK-FUNCTION: Found 1 functions: -CHECK-FUNCTION: name = "{{.*}}", mangled = "{{_?}}main" +CHECK-FUNCTION: name = "main" CHECK-BLOCK: Found 1 blocks: CHECK-BLOCK: Blocks: id = {{.*}}, range = {{.*}} >From f65c12e31f64a52f96a3617f7b9355e184c339e7 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Wed, 24 Sep 2025 14:02:39 +0200 Subject: [PATCH 08/14] fix: remove unused operators --- lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h index f09fa7e24f775..36e075b04f26f 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h @@ -99,16 +99,6 @@ struct SegmentOffset { SegmentOffset(uint16_t s, uint32_t o) : segment(s), offset(o) {} uint16_t segment = 0; uint32_t offset = 0; - - bool operator==(SegmentOffset rhs) const { - return segment == rhs.segment && offset == rhs.offset; - } - - bool operator<(SegmentOffset rhs) const { - if (segment == rhs.segment) - return offset < rhs.offset; - return segment < rhs.segment; - } }; struct SegmentOffsetLength { >From f2a0657363492150defcc9c5b5f8443522e28b23 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Wed, 24 Sep 2025 14:02:52 +0200 Subject: [PATCH 09/14] fix: resolve auto --- .../source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index 6bd2556b45ab8..fcb2e8d0dc997 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -501,7 +501,7 @@ lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id, return nullptr; PdbTypeSymId sig_id(proc.FunctionType, false); - auto mangled_opt = + std::optional<llvm::StringRef> mangled_opt = FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); Mangled mangled(mangled_opt.value_or(proc.Name)); FunctionSP func_sp = std::make_shared<Function>( >From 0fc03c2df68b1173c220c851de26caf4fcb8c3d5 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Wed, 24 Sep 2025 14:03:38 +0200 Subject: [PATCH 10/14] fix: make `FindMangledSymbol` private --- lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h index dc262478a0eca..85efb51dcb033 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h @@ -141,7 +141,6 @@ class SymbolFileNativePDB : public SymbolFileCommon { std::optional<PdbCompilandSymId> FindSymbolScope(PdbCompilandSymId id); std::optional<llvm::StringRef> FindMangledFunctionName(PdbCompilandSymId id); - std::optional<llvm::StringRef> FindMangledSymbol(SegmentOffset so); void FindTypes(const lldb_private::TypeQuery &match, lldb_private::TypeResults &results) override; @@ -272,6 +271,8 @@ class SymbolFileNativePDB : public SymbolFileCommon { void CacheUdtDeclarations(); llvm::Expected<Declaration> ResolveUdtDeclaration(PdbTypeSymId type_id); + std::optional<llvm::StringRef> FindMangledSymbol(SegmentOffset so); + llvm::BumpPtrAllocator m_allocator; lldb::addr_t m_obj_load_address = 0; >From c2cad1e0894715f1b092d184501bdc04970d1b72 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Wed, 24 Sep 2025 14:03:55 +0200 Subject: [PATCH 11/14] doc: add documentation for `FindMangledFunctionName` --- .../source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h index 85efb51dcb033..9249d190abfd7 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h @@ -140,6 +140,10 @@ class SymbolFileNativePDB : public SymbolFileCommon { std::optional<PdbCompilandSymId> FindSymbolScope(PdbCompilandSymId id); + /// Find the mangled name for a function + /// + /// \param id A symbol ID of a S_LPROC32/S_GPROC32 record + /// \returns The mangled name of the function (if available) std::optional<llvm::StringRef> FindMangledFunctionName(PdbCompilandSymId id); void FindTypes(const lldb_private::TypeQuery &match, >From 5ba370241cb93faf8073ecb48457ce4961497872 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Thu, 2 Oct 2025 16:29:31 +0200 Subject: [PATCH 12/14] fix: strip leading underscore for __cdecl functions on x86 --- .../Plugins/SymbolFile/NativePDB/PdbUtil.cpp | 18 +++++++ .../Plugins/SymbolFile/NativePDB/PdbUtil.h | 12 +++++ .../NativePDB/SymbolFileNativePDB.cpp | 17 ++++++- .../NativePDB/c-calling-conventions.cpp | 51 +++++++++++++++++++ .../SymbolFile/NativePDB/CMakeLists.txt | 1 + .../SymbolFile/NativePDB/PdbUtilTests.cpp | 47 +++++++++++++++++ 6 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 lldb/test/Shell/SymbolFile/NativePDB/c-calling-conventions.cpp create mode 100644 lldb/unittests/SymbolFile/NativePDB/PdbUtilTests.cpp diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp index 888bd89a72625..6c6a5598689e5 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp @@ -1118,3 +1118,21 @@ size_t lldb_private::npdb::GetSizeOfType(PdbTypeSymId id, } return 0; } + +llvm::StringRef lldb_private::npdb::StripCDeclPrefix(llvm::StringRef mangled) { + // See + // https://learn.microsoft.com/en-us/cpp/build/reference/decorated-names#FormatC + if (!mangled.starts_with('_')) + return mangled; + + // make sure this isn't __stdcall (`_{name}@{sizeof(params)}`) or __vectorcall + // (`{name}@@{sizeof(params)}`). + size_t last_at_pos = mangled.find_last_of('@'); + if (last_at_pos != llvm::StringRef::npos && + last_at_pos < mangled.size() - 1 && + llvm::all_of(mangled.slice(last_at_pos + 1, mangled.size()), + llvm::isDigit)) + return mangled; + + return mangled.drop_front(); +} diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h index 36e075b04f26f..e849dde124297 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h @@ -161,6 +161,18 @@ PdbTypeSymId GetBestPossibleDecl(PdbTypeSymId id, llvm::pdb::TpiStream &tpi); size_t GetSizeOfType(PdbTypeSymId id, llvm::pdb::TpiStream &tpi); +/// Strips the leading underscore of mangled __cdecl functions. +/// +/// If the name comes from another calling convention, it is returned as-is. +/// +/// \pre \c mangled must not be from a 64-bit environment as __cdecl names +/// aren't mangled there. +/// +/// \param[in] mangled A mangled symbol name +/// \returns The stripped name if this name is a mangled __cdecl one. Otherwise, +/// the input is returned. +llvm::StringRef StripCDeclPrefix(llvm::StringRef mangled); + } // namespace npdb } // namespace lldb_private diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index fcb2e8d0dc997..73ae1ca631f40 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -2677,6 +2677,7 @@ SymbolFileNativePDB::FindMangledFunctionName(PdbCompilandSymId func_id) { ProcSym proc(static_cast<SymbolRecordKind>(sym_record.kind())); cantFail(SymbolDeserializer::deserializeAs<ProcSym>(sym_record, proc)); + return FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); } @@ -2686,7 +2687,21 @@ SymbolFileNativePDB::FindMangledSymbol(SegmentOffset so) { so.segment, so.offset); if (!symbol) return std::nullopt; - return symbol->first.Name; + + llvm::StringRef name = symbol->first.Name; + // "In non-64 bit environments" (on x86 in pactice), __cdecl functions get + // prefixed with an underscore. For compilers using LLVM, this happens in LLVM + // (as opposed to the compiler frontend). Because of this, DWARF doesn't + // contain the "full" mangled name in DW_AT_linkage_name for these functions. + // We strip the mangling here for compatibility with DWARF. See + // llvm.org/pr161676 and + // https://learn.microsoft.com/en-us/cpp/build/reference/decorated-names#FormatC + if ((symbol->first.Flags & PublicSymFlags::Function) != + PublicSymFlags::None && + m_index->dbi().getMachineType() == PDB_Machine::x86) + name = StripCDeclPrefix(name); + + return name; } void SymbolFileNativePDB::CacheUdtDeclarations() { diff --git a/lldb/test/Shell/SymbolFile/NativePDB/c-calling-conventions.cpp b/lldb/test/Shell/SymbolFile/NativePDB/c-calling-conventions.cpp new file mode 100644 index 0000000000000..569eaace1baef --- /dev/null +++ b/lldb/test/Shell/SymbolFile/NativePDB/c-calling-conventions.cpp @@ -0,0 +1,51 @@ +// clang-format off +// REQUIRES: lld, x86 + +// RUN: %build --compiler=clang-cl --arch=32 --nodefaultlib --output=%t-32.exe %s +// RUN: lldb-test symbols %t-32.exe | FileCheck --check-prefixes CHECK-32,CHECK-BOTH %s +// RUN: %build --compiler=clang-cl --arch=64 --nodefaultlib --output=%t-64.exe %s +// RUN: lldb-test symbols %t-64.exe | FileCheck --check-prefixes CHECK-64,CHECK-BOTH %s + +extern "C" { +int FuncCCall() { return 0; } +int __stdcall FuncStdCall() { return 0; } +int __fastcall FuncFastCall() { return 0; } +int __vectorcall FuncVectorCall() { return 0; } + +int __cdecl _underscoreCdecl() { return 0; } +int __stdcall _underscoreStdcall() { return 0; } +int __fastcall _underscoreFastcall() { return 0; } +int __vectorcall _underscoreVectorcall() { return 0; } +} + +int main() { + FuncCCall(); + FuncStdCall(); + FuncFastCall(); + FuncVectorCall(); + _underscoreCdecl(); + _underscoreStdcall(); + _underscoreFastcall(); + _underscoreVectorcall(); + return 0; +} + +// CHECK-BOTH-DAG: Function{{.*}}, demangled = FuncCCall, +// CHECK-BOTH-DAG: Function{{.*}}, demangled = FuncVectorCall@@0, +// CHECK-BOTH-DAG: Function{{.*}}, demangled = _underscoreCdecl, +// CHECK-BOTH-DAG: Function{{.*}}, demangled = _underscoreVectorcall@@0, +// CHECK-BOTH-DAG: Function{{.*}}, demangled = main, + +// __stdcall and __fastcall aren't available on 64 bit + +// CHECK-32-DAG: Function{{.*}}, demangled = _FuncStdCall@0, +// CHECK-64-DAG: Function{{.*}}, demangled = FuncStdCall, + +// CHECK-32-DAG: Function{{.*}}, demangled = @FuncFastCall@0, +// CHECK-64-DAG: Function{{.*}}, demangled = FuncFastCall, + +// CHECK-32-DAG: Function{{.*}}, demangled = __underscoreStdcall@0, +// CHECK-64-DAG: Function{{.*}}, demangled = _underscoreStdcall, + +// CHECK-32-DAG: Function{{.*}}, demangled = @_underscoreFastcall@0, +// CHECK-64-DAG: Function{{.*}}, demangled = _underscoreFastcall, diff --git a/lldb/unittests/SymbolFile/NativePDB/CMakeLists.txt b/lldb/unittests/SymbolFile/NativePDB/CMakeLists.txt index bfd74dd5050b4..d0012568516c4 100644 --- a/lldb/unittests/SymbolFile/NativePDB/CMakeLists.txt +++ b/lldb/unittests/SymbolFile/NativePDB/CMakeLists.txt @@ -1,5 +1,6 @@ add_lldb_unittest(SymbolFileNativePDBTests PdbFPOProgramToDWARFExpressionTests.cpp + PdbUtilTests.cpp UdtRecordCompleterTests.cpp LINK_COMPONENTS diff --git a/lldb/unittests/SymbolFile/NativePDB/PdbUtilTests.cpp b/lldb/unittests/SymbolFile/NativePDB/PdbUtilTests.cpp new file mode 100644 index 0000000000000..31ec9e88aa567 --- /dev/null +++ b/lldb/unittests/SymbolFile/NativePDB/PdbUtilTests.cpp @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Plugins/SymbolFile/NativePDB/PdbUtil.h" +#include "gtest/gtest.h" + +using namespace lldb_private::npdb; + +TEST(PdbUtil, StripCDeclPrefix) { + ASSERT_EQ(StripCDeclPrefix("main"), "main"); + + // __cdecl + ASSERT_EQ(StripCDeclPrefix("_main"), "main"); + ASSERT_EQ(StripCDeclPrefix("__main"), "_main"); + ASSERT_EQ(StripCDeclPrefix("_main@"), "main@"); + ASSERT_EQ(StripCDeclPrefix("_main@foo"), "main@foo"); + ASSERT_EQ(StripCDeclPrefix("_main@4@foo"), "main@4@foo"); + + // __stdcall + ASSERT_EQ(StripCDeclPrefix("_main@4"), "_main@4"); + ASSERT_EQ(StripCDeclPrefix("_main@foo@4"), "_main@foo@4"); + ASSERT_EQ(StripCDeclPrefix("_main@4@5"), "_main@4@5"); + + // __fastcall + ASSERT_EQ(StripCDeclPrefix("@main@4"), "@main@4"); + + // __vectorcall + ASSERT_EQ(StripCDeclPrefix("main@@4"), "main@@4"); + ASSERT_EQ(StripCDeclPrefix("_main@@4"), "_main@@4"); + + // MS C++ mangling + ASSERT_EQ(StripCDeclPrefix("?a@@YAHD@Z"), "?a@@YAHD@Z"); + // Itanium mangling (e.g. on MinGW) + ASSERT_EQ(StripCDeclPrefix("__Z7recursei"), "_Z7recursei"); + + ASSERT_EQ(StripCDeclPrefix("_"), ""); + ASSERT_EQ(StripCDeclPrefix("_@"), "@"); + ASSERT_EQ(StripCDeclPrefix(""), ""); + ASSERT_EQ(StripCDeclPrefix("_@4"), "_@4"); + ASSERT_EQ(StripCDeclPrefix("@4"), "@4"); + ASSERT_EQ(StripCDeclPrefix("@"), "@"); +} >From 20356cc1e06afb092f10d96728bcc00bd3feea70 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Fri, 3 Oct 2025 12:24:52 +0200 Subject: [PATCH 13/14] fix: check function type to get calling convention --- .../Plugins/SymbolFile/NativePDB/PdbUtil.cpp | 18 ------- .../Plugins/SymbolFile/NativePDB/PdbUtil.h | 12 ----- .../NativePDB/SymbolFileNativePDB.cpp | 50 ++++++++++++++++--- .../NativePDB/SymbolFileNativePDB.h | 3 ++ .../SymbolFile/NativePDB/CMakeLists.txt | 1 - .../SymbolFile/NativePDB/PdbUtilTests.cpp | 47 ----------------- 6 files changed, 46 insertions(+), 85 deletions(-) delete mode 100644 lldb/unittests/SymbolFile/NativePDB/PdbUtilTests.cpp diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp index 6c6a5598689e5..888bd89a72625 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp @@ -1118,21 +1118,3 @@ size_t lldb_private::npdb::GetSizeOfType(PdbTypeSymId id, } return 0; } - -llvm::StringRef lldb_private::npdb::StripCDeclPrefix(llvm::StringRef mangled) { - // See - // https://learn.microsoft.com/en-us/cpp/build/reference/decorated-names#FormatC - if (!mangled.starts_with('_')) - return mangled; - - // make sure this isn't __stdcall (`_{name}@{sizeof(params)}`) or __vectorcall - // (`{name}@@{sizeof(params)}`). - size_t last_at_pos = mangled.find_last_of('@'); - if (last_at_pos != llvm::StringRef::npos && - last_at_pos < mangled.size() - 1 && - llvm::all_of(mangled.slice(last_at_pos + 1, mangled.size()), - llvm::isDigit)) - return mangled; - - return mangled.drop_front(); -} diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h index e849dde124297..36e075b04f26f 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h @@ -161,18 +161,6 @@ PdbTypeSymId GetBestPossibleDecl(PdbTypeSymId id, llvm::pdb::TpiStream &tpi); size_t GetSizeOfType(PdbTypeSymId id, llvm::pdb::TpiStream &tpi); -/// Strips the leading underscore of mangled __cdecl functions. -/// -/// If the name comes from another calling convention, it is returned as-is. -/// -/// \pre \c mangled must not be from a 64-bit environment as __cdecl names -/// aren't mangled there. -/// -/// \param[in] mangled A mangled symbol name -/// \returns The stripped name if this name is a mangled __cdecl one. Otherwise, -/// the input is returned. -llvm::StringRef StripCDeclPrefix(llvm::StringRef mangled); - } // namespace npdb } // namespace lldb_private diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index 73ae1ca631f40..ac261d3dfc017 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -501,9 +501,13 @@ lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id, return nullptr; PdbTypeSymId sig_id(proc.FunctionType, false); + std::optional<llvm::StringRef> mangled_opt = FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); + if (mangled_opt) + mangled_opt = StripMangledFunctionName(*mangled_opt, proc.FunctionType); Mangled mangled(mangled_opt.value_or(proc.Name)); + FunctionSP func_sp = std::make_shared<Function>( &comp_unit, toOpaqueUid(func_id), toOpaqueUid(sig_id), mangled, func_type.get(), func_addr, @@ -2678,7 +2682,11 @@ SymbolFileNativePDB::FindMangledFunctionName(PdbCompilandSymId func_id) { ProcSym proc(static_cast<SymbolRecordKind>(sym_record.kind())); cantFail(SymbolDeserializer::deserializeAs<ProcSym>(sym_record, proc)); - return FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); + std::optional<llvm::StringRef> mangled = + FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); + if (mangled) + mangled = StripMangledFunctionName(*mangled, proc.FunctionType); + return mangled; } std::optional<llvm::StringRef> @@ -2688,7 +2696,12 @@ SymbolFileNativePDB::FindMangledSymbol(SegmentOffset so) { if (!symbol) return std::nullopt; - llvm::StringRef name = symbol->first.Name; + return symbol->first.Name; +} + +llvm::StringRef +SymbolFileNativePDB::StripMangledFunctionName(const llvm::StringRef mangled, + PdbTypeSymId func_ty) { // "In non-64 bit environments" (on x86 in pactice), __cdecl functions get // prefixed with an underscore. For compilers using LLVM, this happens in LLVM // (as opposed to the compiler frontend). Because of this, DWARF doesn't @@ -2696,12 +2709,35 @@ SymbolFileNativePDB::FindMangledSymbol(SegmentOffset so) { // We strip the mangling here for compatibility with DWARF. See // llvm.org/pr161676 and // https://learn.microsoft.com/en-us/cpp/build/reference/decorated-names#FormatC - if ((symbol->first.Flags & PublicSymFlags::Function) != - PublicSymFlags::None && - m_index->dbi().getMachineType() == PDB_Machine::x86) - name = StripCDeclPrefix(name); - return name; + if (!mangled.starts_with('_') || + m_index->dbi().getMachineType() != PDB_Machine::x86) + return mangled; + + CVType cvt = m_index->tpi().getType(func_ty.index); + PDB_CallingConv cc = PDB_CallingConv::NearC; + if (cvt.kind() == LF_PROCEDURE) { + ProcedureRecord proc; + if (llvm::Error error = + TypeDeserializer::deserializeAs<ProcedureRecord>(cvt, proc)) + llvm::consumeError(std::move(error)); + cc = proc.CallConv; + } else if (cvt.kind() == LF_MFUNCTION) { + MemberFunctionRecord mfunc; + if (llvm::Error error = + TypeDeserializer::deserializeAs<MemberFunctionRecord>(cvt, mfunc)) + llvm::consumeError(std::move(error)); + cc = mfunc.CallConv; + } else { + LLDB_LOG(GetLog(LLDBLog::Symbols), "Unexpected function type, got {0}", + cvt.kind()); + return mangled; + } + + if (cc == PDB_CallingConv::NearC || cc == PDB_CallingConv::FarC) + return mangled.drop_front(); + + return mangled; } void SymbolFileNativePDB::CacheUdtDeclarations() { diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h index 9249d190abfd7..998f834d77eaa 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h @@ -277,6 +277,9 @@ class SymbolFileNativePDB : public SymbolFileCommon { std::optional<llvm::StringRef> FindMangledSymbol(SegmentOffset so); + llvm::StringRef StripMangledFunctionName(llvm::StringRef mangled, + PdbTypeSymId func_ty); + llvm::BumpPtrAllocator m_allocator; lldb::addr_t m_obj_load_address = 0; diff --git a/lldb/unittests/SymbolFile/NativePDB/CMakeLists.txt b/lldb/unittests/SymbolFile/NativePDB/CMakeLists.txt index d0012568516c4..bfd74dd5050b4 100644 --- a/lldb/unittests/SymbolFile/NativePDB/CMakeLists.txt +++ b/lldb/unittests/SymbolFile/NativePDB/CMakeLists.txt @@ -1,6 +1,5 @@ add_lldb_unittest(SymbolFileNativePDBTests PdbFPOProgramToDWARFExpressionTests.cpp - PdbUtilTests.cpp UdtRecordCompleterTests.cpp LINK_COMPONENTS diff --git a/lldb/unittests/SymbolFile/NativePDB/PdbUtilTests.cpp b/lldb/unittests/SymbolFile/NativePDB/PdbUtilTests.cpp deleted file mode 100644 index 31ec9e88aa567..0000000000000 --- a/lldb/unittests/SymbolFile/NativePDB/PdbUtilTests.cpp +++ /dev/null @@ -1,47 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Plugins/SymbolFile/NativePDB/PdbUtil.h" -#include "gtest/gtest.h" - -using namespace lldb_private::npdb; - -TEST(PdbUtil, StripCDeclPrefix) { - ASSERT_EQ(StripCDeclPrefix("main"), "main"); - - // __cdecl - ASSERT_EQ(StripCDeclPrefix("_main"), "main"); - ASSERT_EQ(StripCDeclPrefix("__main"), "_main"); - ASSERT_EQ(StripCDeclPrefix("_main@"), "main@"); - ASSERT_EQ(StripCDeclPrefix("_main@foo"), "main@foo"); - ASSERT_EQ(StripCDeclPrefix("_main@4@foo"), "main@4@foo"); - - // __stdcall - ASSERT_EQ(StripCDeclPrefix("_main@4"), "_main@4"); - ASSERT_EQ(StripCDeclPrefix("_main@foo@4"), "_main@foo@4"); - ASSERT_EQ(StripCDeclPrefix("_main@4@5"), "_main@4@5"); - - // __fastcall - ASSERT_EQ(StripCDeclPrefix("@main@4"), "@main@4"); - - // __vectorcall - ASSERT_EQ(StripCDeclPrefix("main@@4"), "main@@4"); - ASSERT_EQ(StripCDeclPrefix("_main@@4"), "_main@@4"); - - // MS C++ mangling - ASSERT_EQ(StripCDeclPrefix("?a@@YAHD@Z"), "?a@@YAHD@Z"); - // Itanium mangling (e.g. on MinGW) - ASSERT_EQ(StripCDeclPrefix("__Z7recursei"), "_Z7recursei"); - - ASSERT_EQ(StripCDeclPrefix("_"), ""); - ASSERT_EQ(StripCDeclPrefix("_@"), "@"); - ASSERT_EQ(StripCDeclPrefix(""), ""); - ASSERT_EQ(StripCDeclPrefix("_@4"), "_@4"); - ASSERT_EQ(StripCDeclPrefix("@4"), "@4"); - ASSERT_EQ(StripCDeclPrefix("@"), "@"); -} >From 3779ad3e85c2c5c0b94ccbca41fb4e01de7abdf5 Mon Sep 17 00:00:00 2001 From: Nerixyz <[email protected]> Date: Mon, 6 Oct 2025 13:50:25 +0200 Subject: [PATCH 14/14] fix: strip function name in `FindMangledSymbol` --- .../NativePDB/SymbolFileNativePDB.cpp | 25 +++++++++++-------- .../NativePDB/SymbolFileNativePDB.h | 11 +++++++- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index ac261d3dfc017..75a8189b2023a 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -502,10 +502,8 @@ lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id, PdbTypeSymId sig_id(proc.FunctionType, false); - std::optional<llvm::StringRef> mangled_opt = - FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); - if (mangled_opt) - mangled_opt = StripMangledFunctionName(*mangled_opt, proc.FunctionType); + std::optional<llvm::StringRef> mangled_opt = FindMangledSymbol( + SegmentOffset(proc.Segment, proc.CodeOffset), proc.FunctionType); Mangled mangled(mangled_opt.value_or(proc.Name)); FunctionSP func_sp = std::make_shared<Function>( @@ -2682,21 +2680,26 @@ SymbolFileNativePDB::FindMangledFunctionName(PdbCompilandSymId func_id) { ProcSym proc(static_cast<SymbolRecordKind>(sym_record.kind())); cantFail(SymbolDeserializer::deserializeAs<ProcSym>(sym_record, proc)); - std::optional<llvm::StringRef> mangled = - FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset)); - if (mangled) - mangled = StripMangledFunctionName(*mangled, proc.FunctionType); - return mangled; + return FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset), + proc.FunctionType); } std::optional<llvm::StringRef> -SymbolFileNativePDB::FindMangledSymbol(SegmentOffset so) { +SymbolFileNativePDB::FindMangledSymbol(SegmentOffset so, + TypeIndex function_type) { auto symbol = m_index->publics().findByAddress(m_index->symrecords(), so.segment, so.offset); if (!symbol) return std::nullopt; - return symbol->first.Name; + llvm::StringRef name = symbol->first.Name; + // For functions, we might need to strip the mangled name. See + // StripMangledFunctionName for more info. + if (!function_type.isNoneType() && + (symbol->first.Flags & PublicSymFlags::Function) != PublicSymFlags::None) + name = StripMangledFunctionName(name, function_type); + + return name; } llvm::StringRef diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h index 998f834d77eaa..a5fef354af65c 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h @@ -275,7 +275,16 @@ class SymbolFileNativePDB : public SymbolFileCommon { void CacheUdtDeclarations(); llvm::Expected<Declaration> ResolveUdtDeclaration(PdbTypeSymId type_id); - std::optional<llvm::StringRef> FindMangledSymbol(SegmentOffset so); + /// Find a symbol name at a specific address (`so`). + /// + /// \param[in] so The segment and offset where the symbol is located. + /// \param[in] function_type If the symbol is expected to be a function, this + /// has to be the type of the function. It's used to strip the name of + /// __cdecl functions on x86. + /// \returns The mangled symbol name if found, otherwise `std::nullopt`. + std::optional<llvm::StringRef> FindMangledSymbol( + SegmentOffset so, + llvm::codeview::TypeIndex function_type = llvm::codeview::TypeIndex()); llvm::StringRef StripMangledFunctionName(llvm::StringRef mangled, PdbTypeSymId func_ty); _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
