[Lldb-commits] [lldb] 3f61e4e - [LLDB][NativePDB] Resolve declaration for tag types (#152579)
via lldb-commits
lldb-commits at lists.llvm.org
Wed Aug 13 06:47:24 PDT 2025
Author: nerix
Date: 2025-08-13T14:47:21+01:00
New Revision: 3f61e4eae65fcca0aaef4c726dd8f2ed6b473e7f
URL: https://github.com/llvm/llvm-project/commit/3f61e4eae65fcca0aaef4c726dd8f2ed6b473e7f
DIFF: https://github.com/llvm/llvm-project/commit/3f61e4eae65fcca0aaef4c726dd8f2ed6b473e7f.diff
LOG: [LLDB][NativePDB] Resolve declaration for tag types (#152579)
Tag types like stucts or enums didn't have a declaration attached to
them. The source locations are present in the IPI stream in
`LF_UDT_MOD_SRC_LINE` records:
```
0x101F | LF_UDT_MOD_SRC_LINE [size = 18, hash = 0x1C63]
udt = 0x1058, mod = 3, file = 1, line = 0
0x2789 | LF_UDT_MOD_SRC_LINE [size = 18, hash = 0x1E5A]
udt = 0x1253, mod = 35, file = 93, line = 17069
```
The file is an ID in the string table `/names`:
```
ID | String
1 | '\<unknown>'
12 | 'D:\a\_work\1\s\src\ExternalAPIs\WindowsSDKInc\c\Include\10.0.22621.0\um\wingdi.h'
93 | 'D:\a\_work\1\s\src\ExternalAPIs\WindowsSDKInc\c\Include\10.0.22621.0\um\winnt.h'
```
Here, we're not interested in `mod`. This would indicate which module
contributed the UDT.
I was looking at Rustc's PDB and found that it uses `<unknown>` for some
types, so I added a check for that.
This makes two DIA PDB shell tests to work with the native PDB plugin.
---------
Co-authored-by: Michael Buch <michaelbuch12 at gmail.com>
Added:
lldb/test/Shell/SymbolFile/NativePDB/unknown-udt-decl.ll
Modified:
lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
lldb/test/Shell/SymbolFile/PDB/class-layout.test
lldb/test/Shell/SymbolFile/PDB/enums-layout.test
Removed:
################################################################################
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index 986d647b4de2d..337052fc6dbd0 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -644,8 +644,14 @@ SymbolFileNativePDB::CreateClassStructUnion(PdbTypeSymId type_id,
std::string uname = GetUnqualifiedTypeName(record);
- // FIXME: Search IPI stream for LF_UDT_MOD_SRC_LINE.
+ llvm::Expected maybeDecl = ResolveUdtDeclaration(type_id);
Declaration decl;
+ if (maybeDecl)
+ decl = std::move(*maybeDecl);
+ else
+ LLDB_LOG_ERROR(GetLog(LLDBLog::Symbols), maybeDecl.takeError(),
+ "Failed to resolve declaration for '{1}': {0}", uname);
+
return MakeType(toOpaqueUid(type_id), ConstString(uname), size, nullptr,
LLDB_INVALID_UID, Type::eEncodingIsUID, decl, ct,
Type::ResolveState::Forward);
@@ -668,7 +674,14 @@ lldb::TypeSP SymbolFileNativePDB::CreateTagType(PdbTypeSymId type_id,
CompilerType ct) {
std::string uname = GetUnqualifiedTypeName(er);
+ llvm::Expected maybeDecl = ResolveUdtDeclaration(type_id);
Declaration decl;
+ if (maybeDecl)
+ decl = std::move(*maybeDecl);
+ else
+ LLDB_LOG_ERROR(GetLog(LLDBLog::Symbols), maybeDecl.takeError(),
+ "Failed to resolve declaration for '{1}': {0}", uname);
+
TypeSP underlying_type = GetOrCreateType(er.UnderlyingType);
return MakeType(
@@ -2556,3 +2569,70 @@ SymbolFileNativePDB::GetContextForType(TypeIndex ti) {
}
return ctx;
}
+
+void SymbolFileNativePDB::CacheUdtDeclarations() {
+ for (CVType cvt : m_index->ipi().typeArray()) {
+ switch (cvt.kind()) {
+ case LF_UDT_SRC_LINE: {
+ UdtSourceLineRecord udt_src;
+ llvm::cantFail(TypeDeserializer::deserializeAs(cvt, udt_src));
+ m_udt_declarations.try_emplace(
+ udt_src.UDT, UdtDeclaration{/*FileNameIndex=*/udt_src.SourceFile,
+ /*IsIpiIndex=*/true,
+ /*Line=*/udt_src.LineNumber});
+ } break;
+ case LF_UDT_MOD_SRC_LINE: {
+ UdtModSourceLineRecord udt_mod_src;
+ llvm::cantFail(TypeDeserializer::deserializeAs(cvt, udt_mod_src));
+ // Some types might be contributed by multiple modules. We assume that
+ // they all point to the same file and line because we can only provide
+ // one location.
+ m_udt_declarations.try_emplace(
+ udt_mod_src.UDT,
+ UdtDeclaration{/*FileNameIndex=*/udt_mod_src.SourceFile,
+ /*IsIpiIndex=*/false,
+ /*Line=*/udt_mod_src.LineNumber});
+ } break;
+ default:
+ break;
+ }
+ }
+}
+
+llvm::Expected<Declaration>
+SymbolFileNativePDB::ResolveUdtDeclaration(PdbTypeSymId type_id) {
+ std::call_once(m_cached_udt_declarations, [this] { CacheUdtDeclarations(); });
+
+ auto it = m_udt_declarations.find(type_id.index);
+ if (it == m_udt_declarations.end())
+ return llvm::createStringError("No UDT declaration found");
+
+ llvm::StringRef file_name;
+ if (it->second.IsIpiIndex) {
+ CVType cvt = m_index->ipi().getType(it->second.FileNameIndex);
+ if (cvt.kind() != LF_STRING_ID)
+ return llvm::createStringError("File name was not a LF_STRING_ID");
+
+ StringIdRecord sid;
+ llvm::cantFail(TypeDeserializer::deserializeAs(cvt, sid));
+ file_name = sid.String;
+ } else {
+ // The file name index is an index into the string table
+ auto string_table = m_index->pdb().getStringTable();
+ if (!string_table)
+ return string_table.takeError();
+
+ llvm::Expected<llvm::StringRef> string =
+ string_table->getStringTable().getString(
+ it->second.FileNameIndex.getIndex());
+ if (!string)
+ return string.takeError();
+ file_name = *string;
+ }
+
+ // rustc sets the filename to "<unknown>" for some files
+ if (file_name == "\\<unknown>")
+ return Declaration();
+
+ return Declaration(FileSpec(file_name), it->second.Line);
+}
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
index 6bbeb8bb14428..cfa00416d9673 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
@@ -262,6 +262,9 @@ class SymbolFileNativePDB : public SymbolFileCommon {
void CacheFunctionNames();
+ void CacheUdtDeclarations();
+ llvm::Expected<Declaration> ResolveUdtDeclaration(PdbTypeSymId type_id);
+
llvm::BumpPtrAllocator m_allocator;
lldb::addr_t m_obj_load_address = 0;
@@ -283,6 +286,18 @@ class SymbolFileNativePDB : public SymbolFileCommon {
llvm::DenseMap<llvm::codeview::TypeIndex, llvm::codeview::TypeIndex>
m_parent_types;
+ struct UdtDeclaration {
+ /// This could either be an index into the `/names` section (string table,
+ /// LF_UDT_MOD_SRC_LINE) or, this could be an index into the IPI stream to a
+ /// LF_STRING_ID record (LF_UDT_SRC_LINE).
+ llvm::codeview::TypeIndex FileNameIndex;
+ bool IsIpiIndex;
+
+ uint32_t Line;
+ };
+ llvm::DenseMap<llvm::codeview::TypeIndex, UdtDeclaration> m_udt_declarations;
+ std::once_flag m_cached_udt_declarations;
+
lldb_private::UniqueCStringMap<uint32_t> m_type_base_names;
/// mangled name/full function name -> Global ID(s)
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/unknown-udt-decl.ll b/lldb/test/Shell/SymbolFile/NativePDB/unknown-udt-decl.ll
new file mode 100644
index 0000000000000..af787897dd8cf
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/NativePDB/unknown-udt-decl.ll
@@ -0,0 +1,56 @@
+; Test that the declaration for UDTs won't be "<unknown>" or "\<unknown>".
+; Rustc sets the location of some builtin types to this string.
+
+; REQUIRES: system-windows
+; RUN: %build --compiler=clang-cl --nodefaultlib -o %t.exe -- %s
+; RUN: lldb-test symbols %t.exe | FileCheck %s
+
+; there shouldn't be a declaration (would be between size and compiler_type)
+; CHECK: Type{{.*}} , name = "Foo", size = 1, compiler_type = {{.*}} struct Foo {
+
+; This is edited output from clang simulates rustc behavior (see !17)
+; Source:
+; struct Foo {};
+;
+; int main() { Foo f; }
+
+
+; ModuleID = 'main.cpp'
+source_filename = "main.cpp"
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.44.35207"
+
+%struct.Foo = type { i8 }
+
+; Function Attrs: mustprogress noinline norecurse nounwind optnone uwtable
+define dso_local noundef i32 @main() #0 !dbg !9 {
+ %1 = alloca %struct.Foo, align 1
+ #dbg_declare(ptr %1, !14, !DIExpression(), !16)
+ ret i32 0, !dbg !16
+}
+
+attributes #0 = { mustprogress noinline norecurse nounwind optnone uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 20.1.6", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "main.cpp", directory: "F:\\Dev\\rust-dbg-test", checksumkind: CSK_MD5, checksum: "b8942260dadf9ec35328889f05afb954")
+!2 = !{i32 2, !"CodeView", i32 1}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 2}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"uwtable", i32 2}
+!7 = !{i32 1, !"MaxTLSAlign", i32 65536}
+!8 = !{!"clang version 20.1.6"}
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !10, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !13)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!13 = !{}
+!14 = !DILocalVariable(name: "f", scope: !9, file: !1, line: 3, type: !15)
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", file: !17, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !13, identifier: ".?AUFoo@@")
+!16 = !DILocation(line: 3, scope: !9)
+; This is how rustc emits some types
+!17 = !DIFile(filename: "<unknown>", directory: "")
diff --git a/lldb/test/Shell/SymbolFile/PDB/class-layout.test b/lldb/test/Shell/SymbolFile/PDB/class-layout.test
index e9a7d1c0daa9e..eca910e997e40 100644
--- a/lldb/test/Shell/SymbolFile/PDB/class-layout.test
+++ b/lldb/test/Shell/SymbolFile/PDB/class-layout.test
@@ -12,9 +12,19 @@ RUN: lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix
RUN: lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=BASE %s
RUN: lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=FRIEND %s
RUN: lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=CLASS %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=ENUM %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=UNION %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=STRUCT %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=COMPLEX %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=LIST %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=UNNAMED-STRUCT %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=BASE %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=FRIEND %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/ClassLayoutTest.cpp.exe | FileCheck --check-prefix=CLASS %s
CHECK: Module [[MOD:.*]]
-CHECK: SymbolFile pdb ([[MOD]])
+CHECK: SymbolFile {{(native-)?}}pdb ([[MOD]])
CHECK: {{^[0-9A-F]+}}: CompileUnit{{[{]0x[0-9a-f]+[}]}}, language = "c++", file = '{{.*}}\ClassLayoutTest.cpp'
ENUM: name = "Enum", size = 4, decl = ClassLayoutTest.cpp:5
diff --git a/lldb/test/Shell/SymbolFile/PDB/enums-layout.test b/lldb/test/Shell/SymbolFile/PDB/enums-layout.test
index 6f861c6d65adf..9766d6f8b0324 100644
--- a/lldb/test/Shell/SymbolFile/PDB/enums-layout.test
+++ b/lldb/test/Shell/SymbolFile/PDB/enums-layout.test
@@ -7,6 +7,12 @@ RUN: lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-
RUN: lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=UCHAR-ENUM %s
RUN: lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=CLASS-ENUM %s
RUN: lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=STRUCT-ENUM %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=ENUM %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=CONST-ENUM %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=EMPTY-ENUM %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=UCHAR-ENUM %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=CLASS-ENUM %s
+RUN: env LLDB_USE_NATIVE_PDB_READER=1 lldb-test symbols %t.dir/SimpleTypesTest.cpp.enums.exe | FileCheck --check-prefix=STRUCT-ENUM %s
; FIXME: PDB does not have information about scoped enumeration (Enum class) so the
; compiler type used is the same as the one for unscoped enumeration.
More information about the lldb-commits
mailing list