Author: nerix Date: 2025-08-13T14:47:21+01:00 New Revision: 3f61e4eae65fcca0aaef4c726dd8f2ed6b473e7f
URL: https://github.com/llvm/llvm-project/commit/3f61e4eae65fcca0aaef4c726dd8f2ed6b473e7f DIFF: https://github.com/llvm/llvm-project/commit/3f61e4eae65fcca0aaef4c726dd8f2ed6b473e7f.diff LOG: [LLDB][NativePDB] Resolve declaration for tag types (#152579) Tag types like stucts or enums didn't have a declaration attached to them. The source locations are present in the IPI stream in `LF_UDT_MOD_SRC_LINE` records: ``` 0x101F | LF_UDT_MOD_SRC_LINE [size = 18, hash = 0x1C63] udt = 0x1058, mod = 3, file = 1, line = 0 0x2789 | LF_UDT_MOD_SRC_LINE [size = 18, hash = 0x1E5A] udt = 0x1253, mod = 35, file = 93, line = 17069 ``` The file is an ID in the string table `/names`: ``` ID | String 1 | '\<unknown>' 12 | 'D:\a\_work\1\s\src\ExternalAPIs\WindowsSDKInc\c\Include\10.0.22621.0\um\wingdi.h' 93 | 'D:\a\_work\1\s\src\ExternalAPIs\WindowsSDKInc\c\Include\10.0.22621.0\um\winnt.h' ``` Here, we're not interested in `mod`. This would indicate which module contributed the UDT. I was looking at Rustc's PDB and found that it uses `<unknown>` for some types, so I added a check for that. This makes two DIA PDB shell tests to work with the native PDB plugin. --------- Co-authored-by: Michael Buch <michaelbuc...@gmail.com> Added: lldb/test/Shell/SymbolFile/NativePDB/unknown-udt-decl.ll Modified: lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h lldb/test/Shell/SymbolFile/PDB/class-layout.test lldb/test/Shell/SymbolFile/PDB/enums-layout.test Removed: ################################################################################ diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index 986d647b4de2d..337052fc6dbd0 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -644,8 +644,14 @@ SymbolFileNativePDB::CreateClassStructUnion(PdbTypeSymId type_id, std::string uname = GetUnqualifiedTypeName(record); - // FIXME: Search IPI stream for LF_UDT_MOD_SRC_LINE. + llvm::Expected maybeDecl = ResolveUdtDeclaration(type_id); Declaration decl; + if (maybeDecl) + decl = std::move(*maybeDecl); + else + LLDB_LOG_ERROR(GetLog(LLDBLog::Symbols), maybeDecl.takeError(), + "Failed to resolve declaration for '{1}': {0}", uname); + return MakeType(toOpaqueUid(type_id), ConstString(uname), size, nullptr, LLDB_INVALID_UID, Type::eEncodingIsUID, decl, ct, Type::ResolveState::Forward); @@ -668,7 +674,14 @@ lldb::TypeSP SymbolFileNativePDB::CreateTagType(PdbTypeSymId type_id, CompilerType ct) { std::string uname = GetUnqualifiedTypeName(er); + llvm::Expected maybeDecl = ResolveUdtDeclaration(type_id); Declaration decl; + if (maybeDecl) + decl = std::move(*maybeDecl); + else + LLDB_LOG_ERROR(GetLog(LLDBLog::Symbols), maybeDecl.takeError(), + "Failed to resolve declaration for '{1}': {0}", uname); + TypeSP underlying_type = GetOrCreateType(er.UnderlyingType); return MakeType( @@ -2556,3 +2569,70 @@ SymbolFileNativePDB::GetContextForType(TypeIndex ti) { } return ctx; } + +void SymbolFileNativePDB::CacheUdtDeclarations() { + for (CVType cvt : m_index->ipi().typeArray()) { + switch (cvt.kind()) { + case LF_UDT_SRC_LINE: { + UdtSourceLineRecord udt_src; + llvm::cantFail(TypeDeserializer::deserializeAs(cvt, udt_src)); + m_udt_declarations.try_emplace( + udt_src.UDT, UdtDeclaration{/*FileNameIndex=*/udt_src.SourceFile, + /*IsIpiIndex=*/true, + /*Line=*/udt_src.LineNumber}); + } break; + case LF_UDT_MOD_SRC_LINE: { + UdtModSourceLineRecord udt_mod_src; + llvm::cantFail(TypeDeserializer::deserializeAs(cvt, udt_mod_src)); + // Some types might be contributed by multiple modules. We assume that + // they all point to the same file and line because we can only provide + // one location. + m_udt_declarations.try_emplace( + udt_mod_src.UDT, + UdtDeclaration{/*FileNameIndex=*/udt_mod_src.SourceFile, + /*IsIpiIndex=*/false, + /*Line=*/udt_mod_src.LineNumber}); + } break; + default: + break; + } + } +} + +llvm::Expected<Declaration> +SymbolFileNativePDB::ResolveUdtDeclaration(PdbTypeSymId type_id) { + std::call_once(m_cached_udt_declarations, [this] { CacheUdtDeclarations(); }); + + auto it = m_udt_declarations.find(type_id.index); + if (it == m_udt_declarations.end()) + return llvm::createStringError("No UDT declaration found"); + + llvm::StringRef file_name; + if (it->second.IsIpiIndex) { + CVType cvt = m_index->ipi().getType(it->second.FileNameIndex); + if (cvt.kind() != LF_STRING_ID) + return llvm::createStringError("File name was not a LF_STRING_ID"); + + StringIdRecord sid; + llvm::cantFail(TypeDeserializer::deserializeAs(cvt, sid)); + file_name = sid.String; + } else { + // The file name index is an index into the string table + auto string_table = m_index->pdb().getStringTable(); + if (!string_table) + return string_table.takeError(); + + llvm::Expected<llvm::StringRef> string = + string_table->getStringTable().getString( + it->second.FileNameIndex.getIndex()); + if (!string) + return string.takeError(); + file_name = *string; + } + + // rustc sets the filename to "<unknown>" for some files + if (file_name == "\\<unknown>") + return Declaration(); + + return Declaration(FileSpec(file_name), it->second.Line); +} diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h index 6bbeb8bb14428..cfa00416d9673 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h @@ -262,6 +262,9 @@ class SymbolFileNativePDB : public SymbolFileCommon { void CacheFunctionNames(); + void CacheUdtDeclarations(); + llvm::Expected<Declaration> ResolveUdtDeclaration(PdbTypeSymId type_id); + llvm::BumpPtrAllocator m_allocator; lldb::addr_t m_obj_load_address = 0; @@ -283,6 +286,18 @@ class SymbolFileNativePDB : public SymbolFileCommon { llvm::DenseMap<llvm::codeview::TypeIndex, llvm::codeview::TypeIndex> m_parent_types; + struct UdtDeclaration { + /// This could either be an index into the `/names` section (string table, + /// LF_UDT_MOD_SRC_LINE) or, this could be an index into the IPI stream to a + /// LF_STRING_ID record (LF_UDT_SRC_LINE). + llvm::codeview::TypeIndex FileNameIndex; + bool IsIpiIndex; + + uint32_t Line; + }; + llvm::DenseMap<llvm::codeview::TypeIndex, UdtDeclaration> m_udt_declarations; + std::once_flag m_cached_udt_declarations; + lldb_private::UniqueCStringMap<uint32_t> m_type_base_names; /// mangled name/full function name -> Global ID(s) diff --git a/lldb/test/Shell/SymbolFile/NativePDB/unknown-udt-decl.ll b/lldb/test/Shell/SymbolFile/NativePDB/unknown-udt-decl.ll new file mode 100644 index 0000000000000..af787897dd8cf --- /dev/null +++ b/lldb/test/Shell/SymbolFile/NativePDB/unknown-udt-decl.ll @@ -0,0 +1,56 @@ +; Test that the declaration for UDTs won't be "<unknown>" or "\<unknown>". +; Rustc sets the location of some builtin types to this string. + +; REQUIRES: system-windows +; RUN: %build --compiler=clang-cl --nodefaultlib -o %t.exe -- %s +; RUN: lldb-test symbols %t.exe | FileCheck %s + +; there shouldn't be a declaration (would be between size and compiler_type) +; CHECK: Type{{.*}} , name = "Foo", size = 1, compiler_type = {{.*}} struct Foo { + +; This is edited output from clang simulates rustc behavior (see !17) +; Source: +; struct Foo {}; +; +; int main() { Foo f; } + + +; ModuleID = 'main.cpp' +source_filename = "main.cpp" +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.44.35207" + +%struct.Foo = type { i8 } + +; Function Attrs: mustprogress noinline norecurse nounwind optnone uwtable +define dso_local noundef i32 @main() #0 !dbg !9 { + %1 = alloca %struct.Foo, align 1 + #dbg_declare(ptr %1, !14, !DIExpression(), !16) + ret i32 0, !dbg !16 +} + +attributes #0 = { mustprogress noinline norecurse nounwind optnone uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.1.6", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "main.cpp", directory: "F:\\Dev\\rust-dbg-test", checksumkind: CSK_MD5, checksum: "b8942260dadf9ec35328889f05afb954") +!2 = !{i32 2, !"CodeView", i32 1} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 2} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"uwtable", i32 2} +!7 = !{i32 1, !"MaxTLSAlign", i32 65536} +!8 = !{!"clang version 20.1.6"} +!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !10, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !13) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{} +!14 = !DILocalVariable(name: "f", scope: !9, file: !1, line: 3, type: !15) +!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", file: !17, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !13, identifier: ".?AUFoo@@") +!16 = !DILocation(line: 3, scope: !9) +; This is how rustc emits some types +!17 = !DIFile(filename: "<unknown>", directory: "") diff --git a/lldb/test/Shell/SymbolFile/PDB/class-layout.test b/lldb/test/Shell/SymbolFile/PDB/class-layout.test index e9a7d1c0daa9e..eca910e997e40 100644 --- a/lldb/test/Shell/SymbolFile/PDB/class-layout.test +++ b/lldb/test/Shell/SymbolFile/PDB/class-layout.test @@ -12,9 +12,19 @@ RUN: lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix RUN: lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=BASE %s RUN: lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=FRIEND %s RUN: lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=CLASS %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=ENUM %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=UNION %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=STRUCT %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=COMPLEX %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=LIST %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=UNNAMED-STRUCT %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=BASE %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=FRIEND %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=CLASS %s CHECK: Module [[MOD:.*]] -CHECK: SymbolFile pdb ([[MOD]]) +CHECK: SymbolFile {{(native-)?}}pdb ([[MOD]]) CHECK: {{^[0-9A-F]+}}: CompileUnit{{[{]0x[0-9a-f]+[}]}}, language = "c++", file = '{{.*}}\ClassLayoutTest.cpp' ENUM: name = "Enum", size = 4, decl = ClassLayoutTest.cpp:5 diff --git a/lldb/test/Shell/SymbolFile/PDB/enums-layout.test b/lldb/test/Shell/SymbolFile/PDB/enums-layout.test index 6f861c6d65adf..9766d6f8b0324 100644 --- a/lldb/test/Shell/SymbolFile/PDB/enums-layout.test +++ b/lldb/test/Shell/SymbolFile/PDB/enums-layout.test @@ -7,6 +7,12 @@ RUN: lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check- RUN: lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=UCHAR-ENUM %s RUN: lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=CLASS-ENUM %s RUN: lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=STRUCT-ENUM %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=ENUM %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=CONST-ENUM %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=EMPTY-ENUM %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=UCHAR-ENUM %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=CLASS-ENUM %s +RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=STRUCT-ENUM %s ; FIXME: PDB does not have information about scoped enumeration (Enum class) so the ; compiler type used is the same as the one for unscoped enumeration. _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits