[llvm] [PDB] Add public symbol lookup by address (PR #157361)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 9 09:03:29 PDT 2025


https://github.com/Nerixyz updated https://github.com/llvm/llvm-project/pull/157361

>From 76a14bb93bbc76b773e2e69dac9d4c435c728c5a Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Sun, 7 Sep 2025 20:30:27 +0200
Subject: [PATCH 1/8] [PDB] Add public symbol lookup by address

---
 .../llvm/DebugInfo/PDB/Native/PublicsStream.h |  18 ++++
 .../DebugInfo/PDB/Native/PublicsStream.cpp    |  91 ++++++++++++++++++
 llvm/unittests/DebugInfo/PDB/CMakeLists.txt   |   1 +
 .../DebugInfo/PDB/Inputs/PublicSymbols.cpp    |  46 +++++++++
 .../DebugInfo/PDB/Inputs/PublicSymbols.pdb    | Bin 0 -> 53248 bytes
 .../DebugInfo/PDB/PublicsStreamTest.cpp       |  62 ++++++++++++
 6 files changed, 218 insertions(+)
 create mode 100644 llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp
 create mode 100644 llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb
 create mode 100644 llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp

diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
index 2cb4bee8ca5df..c5fdad057e867 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
@@ -18,9 +18,13 @@ namespace llvm {
 namespace msf {
 class MappedBlockStream;
 }
+namespace codeview {
+class PublicSym32;
+}
 namespace pdb {
 struct PublicsStreamHeader;
 struct SectionOffset;
+class SymbolStream;
 
 class PublicsStream {
 public:
@@ -42,6 +46,20 @@ class PublicsStream {
     return SectionOffsets;
   }
 
+  /// Find a public symbol by a segment and offset.
+  ///
+  /// In case there is more than one symbol (for example due to ICF), the first
+  /// one is returned.
+  ///
+  /// \return If a symbol was found, the symbol at the provided address is
+  ///     returned as well as the index of this symbol in the address map. If
+  ///     the binary was linked with ICF, there might be more symbols with the
+  ///     same address after the returned one. If no symbol is found,
+  ///     `std::nullopt` is returned.
+  LLVM_ABI std::optional<std::pair<codeview::PublicSym32, size_t>>
+  findByAddress(const SymbolStream &Symbols, uint16_t Segment,
+                uint32_t Offset) const;
+
 private:
   std::unique_ptr<msf::MappedBlockStream> Stream;
   GSIHashTable PublicsTable;
diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index c350e0e0b3e19..984e6e70adba2 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -22,9 +22,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/Error.h"
 #include <cstdint>
@@ -96,3 +99,91 @@ Error PublicsStream::reload() {
                                 "Corrupted publics stream.");
   return Error::success();
 }
+
+static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffset,
+                                     uint16_t RhsSegment, uint32_t RhsOffset) {
+  if (LhsSegment == RhsSegment)
+    return LhsOffset - RhsOffset;
+  return LhsSegment - RhsSegment;
+}
+
+static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffst,
+                                     const codeview::PublicSym32 &Rhs) {
+  return compareSegmentOffset(LhsSegment, LhsOffst, Rhs.Segment, Rhs.Offset);
+}
+
+// This is a reimplementation of NearestSym:
+// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581
+std::optional<std::pair<codeview::PublicSym32, size_t>>
+PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
+                             uint32_t Offset) const {
+  // The address map is sorted by address, so we do binary search.
+  // Each element is an offset into the symbols for a public symbol.
+  auto Lo = AddressMap.begin();
+  auto Hi = AddressMap.end();
+  Hi -= 1;
+
+  while (Lo < Hi) {
+    auto Cur = Lo + ((Hi - Lo + 1) / 2);
+    auto Sym = Symbols.readRecord(Cur->value());
+    if (Sym.kind() != codeview::S_PUB32)
+      return std::nullopt; // this is most likely corrupted debug info
+
+    auto Psym =
+        codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
+    if (!Psym) {
+      consumeError(Psym.takeError());
+      return std::nullopt;
+    }
+
+    uint32_t Cmp = compareSegmentOffset(Segment, Offset, *Psym);
+    if (Cmp < 0) {
+      Cur -= 1;
+      Hi = Cur;
+    } else if (Cmp == 0)
+      Lo = Hi = Cur;
+    else
+      Lo = Cur;
+  }
+
+  auto Sym = Symbols.readRecord(Lo->value());
+  if (Sym.kind() != codeview::S_PUB32)
+    return std::nullopt; // this is most likely corrupted debug info
+
+  auto MaybePsym =
+      codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
+  if (!MaybePsym) {
+    consumeError(MaybePsym.takeError());
+    return std::nullopt;
+  }
+  codeview::PublicSym32 Psym = std::move(*MaybePsym);
+
+  uint32_t Cmp = compareSegmentOffset(Segment, Offset, Psym);
+  if (Cmp != 0)
+    return std::nullopt;
+
+  // We found a symbol. Due to ICF, multiple symbols can have the same
+  // address, so return the first one
+  while (Lo != AddressMap.begin()) {
+    --Lo;
+    Sym = Symbols.readRecord(Lo->value());
+    if (Sym.kind() != codeview::S_PUB32)
+      return std::nullopt;
+    MaybePsym =
+        codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
+    if (!MaybePsym) {
+      consumeError(MaybePsym.takeError());
+      return std::nullopt;
+    }
+
+    if (MaybePsym->Segment != Segment || MaybePsym->Offset != Offset) {
+      ++Lo;
+      break;
+    }
+
+    Psym = std::move(*MaybePsym);
+  }
+
+  std::ptrdiff_t IterOffset = Lo - AddressMap.begin();
+  return std::pair{Psym, static_cast<size_t>(IterOffset)};
+}
diff --git a/llvm/unittests/DebugInfo/PDB/CMakeLists.txt b/llvm/unittests/DebugInfo/PDB/CMakeLists.txt
index ba2a732848f4d..b1b9d2d98c944 100644
--- a/llvm/unittests/DebugInfo/PDB/CMakeLists.txt
+++ b/llvm/unittests/DebugInfo/PDB/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_unittest_with_input_files(DebugInfoPDBTests
   StringTableBuilderTest.cpp
   PDBApiTest.cpp
   PDBVariantTest.cpp
+  PublicsStreamTest.cpp
   )
 
 target_link_libraries(DebugInfoPDBTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp
new file mode 100644
index 0000000000000..0aeab04543caf
--- /dev/null
+++ b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp
@@ -0,0 +1,46 @@
+// clang-format off
+
+// Compile with
+// cl /Z7 /GR- /GS- PublicSymbols.cpp -c /Gy
+// link .\PublicSymbols.obj /DEBUG /NODEFAULTLIB /out:PublicSymbols.exe /ENTRY:main /OPT:ICF
+// llvm-pdbutil pdb2yaml --publics-stream PublicSymbols.pdb > PublicSymbols.yaml
+// llvm-pdbutil yaml2pdb PublicSymbols.yaml
+// 
+// rm PublicSymbols.exe && rm PublicSymbols.obj && rm PublicSymbols.yaml
+
+int foobar(int i){ return i + 1; }
+// these should be merged with ICF
+int dup1(int i){ return i + 2; }
+int dup2(int i){ return i + 2; }
+int dup3(int i){ return i + 2; }
+
+class AClass {
+public:
+    void AMethod(int, char*) {}
+    static bool Something(char c) {
+        return c == ' ';
+    }
+};
+
+struct Base {
+    virtual ~Base() = default;
+};
+struct Derived : public Base {};
+struct Derived2 : public Base {};
+struct Derived3 : public Derived2, public Derived {};
+
+int AGlobal;
+
+void operator delete(void *,unsigned __int64) {}
+
+int main() {
+    foobar(1);
+    dup1(1);
+    dup2(1);
+    dup3(1);
+    AClass a;
+    a.AMethod(1, nullptr);
+    AClass::Something(' ');
+    Derived3 d3;
+    return AGlobal;
+}
diff --git a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb
new file mode 100644
index 0000000000000000000000000000000000000000..ffa3275d58d7b77cb6aad0b89ffb0dceeb1ad39e
GIT binary patch
literal 53248
zcmeI*PiP!v9LDk4G-}g75~WC*gN!1gR+?P{O#(t at la1M8A}MKUDnhcF#4K)hC7Y at T
z4>@@7(1HgK3LfIkgMx>8_n;SvptpL9;8AG>3m$6q`OUnuvzg6e2(%{g`@*yD?!3Qw
z_npsf4zrs~Cd;p_)atdxRe!W^^wcSTa{2{-;B+>7{77$M+EeTHybg88l?i*Sb|~t&
zMF0T=5I_I{1Q0*~0R#|00D*lX at HY(s1Q0*~0R#|0009ILKmY**4!nT3dEgI`14IA;
z1Q0*~0R#|0009ILK;VBbaBQFao&w86009ILKmY**5I_I{1Q0*~f!zz(2Ozy&eZSib
zo%V<RGz1Vp009ILKmY**5I_I{1Q0lI0`Kc$e($z9s`#^kKQ+Fl&jyxNo6-HNx^I2S
z^+TP}<L|Z6wm<FaK6Q^TyZ&U{yZW+ft^StlhurvAZeAO+eCy`>%k at 9I{x?_uqpSPE
z^$)rIKX!Fj-S#23{hZtWlY4x@&GVS68*_ce)#+i<0zbKL_q6X$ny>39M$M1!?6TkW
z{%iJxhwUb5J6pZ2T~_T{u;#6`8~fMoc45iX-g<ka*SXC!$$}hhu(c=}_qT3rdeqVO
z*rVr9=8t!>-<9c6o=~#N^GZ=UugobG<sIcc<pbpt<um1)^0l&|TvvWkZYsBx!&=1&
z#aEtH29<&`rM#jnDb{>tNhv5-l+TqvlpmBH)y!$RzpB5czFWDXTvwjbdw5X2U9_{J
ze58D%Y$~^vo=%NhwC{OkLs?bUl_`zW5I_I{1Q0*~0R#|0;NS>6>OE+u%VEoS!^7E;
zQhgx^&W+{s!OY8^_qZGPRS}LC7FNn{EzCc+P0duQrhl87^gK-^Uc8odH%~5F%Z2n>
zzKq9{wY1dOd6BI*=W4W;mYVcDO(l)BXp(TA{@_x6W@;=yGy7s77Oz(>>cZBc=b0Ud
z7JFtS2=}!CJJ*iJl7-s%Gj8AR^tcvzW+bT%G#=O5dFPh7nrnl=O4~oLO{<E=Yw3<@
zb}Rw5Lp!S at d(yq$bB&s0JqDPrTTj<H=(x_-Z||>T+tghgXqr$pZJ#@OVd5!Uk7IlX
z$1%Rc<B020$I-I3xGuG}=J&-RZ#X|*sl8gN1VMi>AHJ4mu^Cz9Cl^+iYV$#Uv{I_q
zqaTEsqK>kl*CBSh8P!hLmQ`G?zTT*w&d<JNg=+SkHm|Q6z31mIznKg4b}be)+&ZS8
z+_qynXg3!BEM}uQ7i&6<mBy&&J?x${2*;O8<*KcOh5!NxAb<b at 2q1s}0()8D=y$oy
zpe at i&MlEXgy}!N^RL>WHAqY1JAb<b at 2q1s}0tg_000Ic?a{)Wof4eJ}(btUL{mKK1
zeQ({|?6ZKlJ)%1&m5euGyM*n|jXO8&LAar at RU2&b))wu=EdmH2fB*srAb<b at 2q1s}
z0tnnQfwt%F_iP0`5dsJxfB*srAb<b at 2q1s}0tnn~0e=7gZVNL%0tg_000IagfB*sr
zAb<b@|GmJ6r*n;8+uukvyTD!FM7jFL+3I4=>#LTQ7wWnQo&HgvVtd&i3|Kp&bcOdk
z?>^VlA%Fk^2q1s}0tg_000IagfWV#+uv2$ymlV4WVBPiVyc0Ut_jRs6q1fO2A64S(
p1K57{F at T-!+s6R$>A&vM5I_I{1Q0*~0R#|0009ILKwysv`~zW}Izs>e

literal 0
HcmV?d00001

diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
new file mode 100644
index 0000000000000..bac4901073cd0
--- /dev/null
+++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
@@ -0,0 +1,62 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+#include "llvm/Testing/Support/SupportHelpers.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::pdb;
+
+extern const char *TestMainArgv0;
+
+static std::string getExePath() {
+  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
+  llvm::sys::path::append(InputsDir, "PublicSymbols.pdb");
+  return std::string(InputsDir);
+}
+
+TEST(PublicsStreamTest, FindByAddress) {
+  std::string ExePath = getExePath();
+  auto Buffer = MemoryBuffer::getFile(ExePath, /*IsText=*/false,
+                                      /*RequiresNullTerminator=*/false);
+  ASSERT_TRUE(bool(Buffer));
+  auto Stream = std::make_unique<MemoryBufferByteStream>(
+      std::move(*Buffer), llvm::endianness::little);
+
+  BumpPtrAllocator Alloc;
+  PDBFile File(ExePath, std::move(Stream), Alloc);
+  ASSERT_FALSE(bool(File.parseFileHeaders()));
+  ASSERT_FALSE(bool(File.parseStreamData()));
+
+  auto Publics = File.getPDBPublicsStream();
+  ASSERT_TRUE(bool(Publics));
+  auto Symbols = File.getPDBSymbolStream();
+  ASSERT_TRUE(bool(Symbols));
+
+  auto VTableDerived = Publics->findByAddress(*Symbols, 2, 8);
+  ASSERT_TRUE(VTableDerived.has_value());
+  // both derived and derived2 have their vftables there - but derived2 is first
+  // (due to ICF)
+  ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@");
+  ASSERT_EQ(VTableDerived->second, 26);
+
+  ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value());
+  ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value());
+
+  auto GlobalSym = Publics->findByAddress(*Symbols, 3, 0);
+  ASSERT_TRUE(GlobalSym.has_value());
+  ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA");
+  ASSERT_EQ(GlobalSym->second, 30);
+}

>From ce02ae15f2d7fe06bcc2c2f5459f8cedcb1edb3e Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Sun, 7 Sep 2025 22:31:13 +0200
Subject: [PATCH 2/8] fix: make unsigned

---
 llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
index bac4901073cd0..0aa6a95001b7b 100644
--- a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
+++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
@@ -50,7 +50,7 @@ TEST(PublicsStreamTest, FindByAddress) {
   // both derived and derived2 have their vftables there - but derived2 is first
   // (due to ICF)
   ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@");
-  ASSERT_EQ(VTableDerived->second, 26);
+  ASSERT_EQ(VTableDerived->second, 26u);
 
   ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value());
   ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value());
@@ -58,5 +58,5 @@ TEST(PublicsStreamTest, FindByAddress) {
   auto GlobalSym = Publics->findByAddress(*Symbols, 3, 0);
   ASSERT_TRUE(GlobalSym.has_value());
   ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA");
-  ASSERT_EQ(GlobalSym->second, 30);
+  ASSERT_EQ(GlobalSym->second, 30u);
 }

>From 51fde81d412d2aac6052e6b51fa7f3b547b6bdf5 Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Mon, 8 Sep 2025 11:09:36 +0200
Subject: [PATCH 3/8] fix: use lower_bound to find the element

---
 .../DebugInfo/PDB/Native/PublicsStream.cpp    | 78 +++++++------------
 1 file changed, 26 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index 984e6e70adba2..8827bc4a5b329 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -117,36 +117,32 @@ static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffst,
 std::optional<std::pair<codeview::PublicSym32, size_t>>
 PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
                              uint32_t Offset) const {
-  // The address map is sorted by address, so we do binary search.
-  // Each element is an offset into the symbols for a public symbol.
-  auto Lo = AddressMap.begin();
-  auto Hi = AddressMap.end();
-  Hi -= 1;
-
-  while (Lo < Hi) {
-    auto Cur = Lo + ((Hi - Lo + 1) / 2);
-    auto Sym = Symbols.readRecord(Cur->value());
-    if (Sym.kind() != codeview::S_PUB32)
-      return std::nullopt; // this is most likely corrupted debug info
-
-    auto Psym =
-        codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
-    if (!Psym) {
-      consumeError(Psym.takeError());
-      return std::nullopt;
-    }
-
-    uint32_t Cmp = compareSegmentOffset(Segment, Offset, *Psym);
-    if (Cmp < 0) {
-      Cur -= 1;
-      Hi = Cur;
-    } else if (Cmp == 0)
-      Lo = Hi = Cur;
-    else
-      Lo = Cur;
-  }
+  // The address map is sorted by address, so we can use lower_bound to find the
+  // position. Each element is an offset into the symbols for a public symbol.
+  auto It = llvm::lower_bound(
+      AddressMap, std::pair(Segment, Offset),
+      [&](support::ulittle32_t Cur, auto Addr) {
+        auto Sym = Symbols.readRecord(Cur.value());
+        if (Sym.kind() != codeview::S_PUB32)
+          return false; // stop here, this is most likely corrupted debug info
+
+        auto Psym =
+            codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(
+                Sym);
+        if (!Psym) {
+          consumeError(Psym.takeError());
+          return false;
+        }
+
+        if (Psym->Segment == Addr.first)
+          return Psym->Offset < Addr.second;
+        return Psym->Segment < Addr.first;
+      });
+
+  if (It == AddressMap.end())
+    return std::nullopt;
 
-  auto Sym = Symbols.readRecord(Lo->value());
+  auto Sym = Symbols.readRecord(It->value());
   if (Sym.kind() != codeview::S_PUB32)
     return std::nullopt; // this is most likely corrupted debug info
 
@@ -162,28 +158,6 @@ PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
   if (Cmp != 0)
     return std::nullopt;
 
-  // We found a symbol. Due to ICF, multiple symbols can have the same
-  // address, so return the first one
-  while (Lo != AddressMap.begin()) {
-    --Lo;
-    Sym = Symbols.readRecord(Lo->value());
-    if (Sym.kind() != codeview::S_PUB32)
-      return std::nullopt;
-    MaybePsym =
-        codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
-    if (!MaybePsym) {
-      consumeError(MaybePsym.takeError());
-      return std::nullopt;
-    }
-
-    if (MaybePsym->Segment != Segment || MaybePsym->Offset != Offset) {
-      ++Lo;
-      break;
-    }
-
-    Psym = std::move(*MaybePsym);
-  }
-
-  std::ptrdiff_t IterOffset = Lo - AddressMap.begin();
+  std::ptrdiff_t IterOffset = It - AddressMap.begin();
   return std::pair{Psym, static_cast<size_t>(IterOffset)};
 }

>From d2bddebdf1fba47b7dcfdaf8beefe6dc53a47535 Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Tue, 9 Sep 2025 17:03:18 +0200
Subject: [PATCH 4/8] fix: use tuples

---
 llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index 8827bc4a5b329..73c00538d3f4c 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -120,7 +120,7 @@ PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
   // The address map is sorted by address, so we can use lower_bound to find the
   // position. Each element is an offset into the symbols for a public symbol.
   auto It = llvm::lower_bound(
-      AddressMap, std::pair(Segment, Offset),
+      AddressMap, std::tuple(Segment, Offset),
       [&](support::ulittle32_t Cur, auto Addr) {
         auto Sym = Symbols.readRecord(Cur.value());
         if (Sym.kind() != codeview::S_PUB32)
@@ -134,9 +134,7 @@ PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
           return false;
         }
 
-        if (Psym->Segment == Addr.first)
-          return Psym->Offset < Addr.second;
-        return Psym->Segment < Addr.first;
+        return std::tie(Psym->Segment, Psym->Offset) < Addr;
       });
 
   if (It == AddressMap.end())

>From e5ec29b647fbf7648f34e122d0d6157882b687d3 Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Tue, 9 Sep 2025 17:04:11 +0200
Subject: [PATCH 5/8] refactor: generate publics programmatically

---
 .../DebugInfo/PDB/Inputs/PublicSymbols.cpp    |  46 -----
 .../DebugInfo/PDB/Inputs/PublicSymbols.pdb    | Bin 53248 -> 0 bytes
 .../DebugInfo/PDB/PublicsStreamTest.cpp       | 195 +++++++++++++++---
 3 files changed, 171 insertions(+), 70 deletions(-)
 delete mode 100644 llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp
 delete mode 100644 llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb

diff --git a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp
deleted file mode 100644
index 0aeab04543caf..0000000000000
--- a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-// clang-format off
-
-// Compile with
-// cl /Z7 /GR- /GS- PublicSymbols.cpp -c /Gy
-// link .\PublicSymbols.obj /DEBUG /NODEFAULTLIB /out:PublicSymbols.exe /ENTRY:main /OPT:ICF
-// llvm-pdbutil pdb2yaml --publics-stream PublicSymbols.pdb > PublicSymbols.yaml
-// llvm-pdbutil yaml2pdb PublicSymbols.yaml
-// 
-// rm PublicSymbols.exe && rm PublicSymbols.obj && rm PublicSymbols.yaml
-
-int foobar(int i){ return i + 1; }
-// these should be merged with ICF
-int dup1(int i){ return i + 2; }
-int dup2(int i){ return i + 2; }
-int dup3(int i){ return i + 2; }
-
-class AClass {
-public:
-    void AMethod(int, char*) {}
-    static bool Something(char c) {
-        return c == ' ';
-    }
-};
-
-struct Base {
-    virtual ~Base() = default;
-};
-struct Derived : public Base {};
-struct Derived2 : public Base {};
-struct Derived3 : public Derived2, public Derived {};
-
-int AGlobal;
-
-void operator delete(void *,unsigned __int64) {}
-
-int main() {
-    foobar(1);
-    dup1(1);
-    dup2(1);
-    dup3(1);
-    AClass a;
-    a.AMethod(1, nullptr);
-    AClass::Something(' ');
-    Derived3 d3;
-    return AGlobal;
-}
diff --git a/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb b/llvm/unittests/DebugInfo/PDB/Inputs/PublicSymbols.pdb
deleted file mode 100644
index ffa3275d58d7b77cb6aad0b89ffb0dceeb1ad39e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 53248
zcmeI*PiP!v9LDk4G-}g75~WC*gN!1gR+?P{O#(t at la1M8A}MKUDnhcF#4K)hC7Y at T
z4>@@7(1HgK3LfIkgMx>8_n;SvptpL9;8AG>3m$6q`OUnuvzg6e2(%{g`@*yD?!3Qw
z_npsf4zrs~Cd;p_)atdxRe!W^^wcSTa{2{-;B+>7{77$M+EeTHybg88l?i*Sb|~t&
zMF0T=5I_I{1Q0*~0R#|00D*lX at HY(s1Q0*~0R#|0009ILKmY**4!nT3dEgI`14IA;
z1Q0*~0R#|0009ILK;VBbaBQFao&w86009ILKmY**5I_I{1Q0*~f!zz(2Ozy&eZSib
zo%V<RGz1Vp009ILKmY**5I_I{1Q0lI0`Kc$e($z9s`#^kKQ+Fl&jyxNo6-HNx^I2S
z^+TP}<L|Z6wm<FaK6Q^TyZ&U{yZW+ft^StlhurvAZeAO+eCy`>%k at 9I{x?_uqpSPE
z^$)rIKX!Fj-S#23{hZtWlY4x@&GVS68*_ce)#+i<0zbKL_q6X$ny>39M$M1!?6TkW
z{%iJxhwUb5J6pZ2T~_T{u;#6`8~fMoc45iX-g<ka*SXC!$$}hhu(c=}_qT3rdeqVO
z*rVr9=8t!>-<9c6o=~#N^GZ=UugobG<sIcc<pbpt<um1)^0l&|TvvWkZYsBx!&=1&
z#aEtH29<&`rM#jnDb{>tNhv5-l+TqvlpmBH)y!$RzpB5czFWDXTvwjbdw5X2U9_{J
ze58D%Y$~^vo=%NhwC{OkLs?bUl_`zW5I_I{1Q0*~0R#|0;NS>6>OE+u%VEoS!^7E;
zQhgx^&W+{s!OY8^_qZGPRS}LC7FNn{EzCc+P0duQrhl87^gK-^Uc8odH%~5F%Z2n>
zzKq9{wY1dOd6BI*=W4W;mYVcDO(l)BXp(TA{@_x6W@;=yGy7s77Oz(>>cZBc=b0Ud
z7JFtS2=}!CJJ*iJl7-s%Gj8AR^tcvzW+bT%G#=O5dFPh7nrnl=O4~oLO{<E=Yw3<@
zb}Rw5Lp!S at d(yq$bB&s0JqDPrTTj<H=(x_-Z||>T+tghgXqr$pZJ#@OVd5!Uk7IlX
z$1%Rc<B020$I-I3xGuG}=J&-RZ#X|*sl8gN1VMi>AHJ4mu^Cz9Cl^+iYV$#Uv{I_q
zqaTEsqK>kl*CBSh8P!hLmQ`G?zTT*w&d<JNg=+SkHm|Q6z31mIznKg4b}be)+&ZS8
z+_qynXg3!BEM}uQ7i&6<mBy&&J?x${2*;O8<*KcOh5!NxAb<b at 2q1s}0()8D=y$oy
zpe at i&MlEXgy}!N^RL>WHAqY1JAb<b at 2q1s}0tg_000Ic?a{)Wof4eJ}(btUL{mKK1
zeQ({|?6ZKlJ)%1&m5euGyM*n|jXO8&LAar at RU2&b))wu=EdmH2fB*srAb<b at 2q1s}
z0tnnQfwt%F_iP0`5dsJxfB*srAb<b at 2q1s}0tnn~0e=7gZVNL%0tg_000IagfB*sr
zAb<b@|GmJ6r*n;8+uukvyTD!FM7jFL+3I4=>#LTQ7wWnQo&HgvVtd&i3|Kp&bcOdk
z?>^VlA%Fk^2q1s}0tg_000IagfWV#+uv2$ymlV4WVBPiVyc0Ut_jRs6q1fO2A64S(
p1K57{F at T-!+s6R$>A&vM5I_I{1Q0*~0R#|0009ILKwysv`~zW}Izs>e

diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
index 0aa6a95001b7b..0b12285a16d89 100644
--- a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
+++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
@@ -7,43 +7,180 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
 #include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-#include "llvm/Testing/Support/SupportHelpers.h"
 
 #include "gtest/gtest.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
 
-extern const char *TestMainArgv0;
+namespace {
+struct PublicSym {
+  llvm::StringRef Name;
+  uint16_t Segment;
+  uint32_t Offset;
+};
+
+class MockPublics {
+public:
+  MockPublics(size_t StreamSize, BumpPtrAllocator &Alloc,
+              msf::MSFBuilder Builder);
+  static Expected<std::unique_ptr<MockPublics>>
+  create(BumpPtrAllocator &Allocator, size_t StreamSize);
+
+  void addPublics(ArrayRef<PublicSym> Syms);
+  Error finish();
+
+  PublicsStream *publicsStream();
+  SymbolStream *symbolStream();
+
+private:
+  MutableBinaryByteStream Stream;
+
+  msf::MSFBuilder MsfBuilder;
+  std::optional<msf::MSFLayout> MsfLayout;
+
+  GSIStreamBuilder Gsi;
 
-static std::string getExePath() {
-  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
-  llvm::sys::path::append(InputsDir, "PublicSymbols.pdb");
-  return std::string(InputsDir);
+  std::unique_ptr<PublicsStream> Publics;
+  std::unique_ptr<SymbolStream> Symbols;
+};
+
+MockPublics::MockPublics(size_t StreamSize, BumpPtrAllocator &Allocator,
+                         msf::MSFBuilder Builder)
+    : Stream({Allocator.Allocate<uint8_t>(StreamSize), StreamSize},
+             llvm::endianness::little),
+      MsfBuilder(std::move(Builder)), Gsi(this->MsfBuilder) {}
+
+Expected<std::unique_ptr<MockPublics>>
+MockPublics::create(BumpPtrAllocator &Allocator, size_t StreamSize) {
+  auto ExpectedMsf = msf::MSFBuilder::create(Allocator, 4096);
+  if (!ExpectedMsf)
+    return ExpectedMsf.takeError();
+  return std::make_unique<MockPublics>(StreamSize, Allocator,
+                                       std::move(*ExpectedMsf));
+}
+
+void MockPublics::addPublics(ArrayRef<PublicSym> Publics) {
+  std::vector<BulkPublic> Bulks;
+  for (const auto &Sym : Publics) {
+    BulkPublic BP;
+    BP.Name = Sym.Name.data();
+    BP.NameLen = Sym.Name.size();
+    BP.Offset = Sym.Offset;
+    BP.Segment = Sym.Segment;
+    Bulks.emplace_back(BP);
+  }
+  Gsi.addPublicSymbols(std::move(Bulks));
+}
+
+Error MockPublics::finish() {
+  auto Err = Gsi.finalizeMsfLayout();
+  if (Err)
+    return Err;
+
+  auto ExpectedLayout = MsfBuilder.generateLayout();
+  if (!ExpectedLayout)
+    return ExpectedLayout.takeError();
+  MsfLayout = std::move(*ExpectedLayout);
+
+  return Gsi.commit(*MsfLayout, Stream);
+}
+
+PublicsStream *MockPublics::publicsStream() {
+  if (!Publics) {
+    Publics = std::make_unique<PublicsStream>(
+        msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream,
+                                                    Gsi.getPublicsStreamIndex(),
+                                                    MsfBuilder.getAllocator()));
+  }
+  return Publics.get();
+}
+
+SymbolStream *MockPublics::symbolStream() {
+  if (!Symbols) {
+    Symbols = std::make_unique<SymbolStream>(
+        msf::MappedBlockStream::createIndexedStream(*MsfLayout, Stream,
+                                                    Gsi.getRecordStreamIndex(),
+                                                    MsfBuilder.getAllocator()));
+  }
+  return Symbols.get();
+}
+
+std::array GSymbols{
+    PublicSym{"??0Base@@QEAA at XZ", /*Segment=*/1, /*Offset=*/0},
+    PublicSym{"??0Derived@@QEAA at XZ", /*Segment=*/1, /*Offset=*/32},
+    PublicSym{"??0Derived2@@QEAA at XZ", /*Segment=*/1, /*Offset=*/32},
+    PublicSym{"??0Derived3@@QEAA at XZ", /*Segment=*/1, /*Offset=*/80},
+    PublicSym{"??1Base@@UEAA at XZ", /*Segment=*/1, /*Offset=*/160},
+    PublicSym{"??1Derived@@UEAA at XZ", /*Segment=*/1, /*Offset=*/176},
+    PublicSym{"??1Derived2@@UEAA at XZ", /*Segment=*/1, /*Offset=*/176},
+    PublicSym{"??1Derived3@@UEAA at XZ", /*Segment=*/1, /*Offset=*/208},
+    PublicSym{"??3 at YAXPEAX_K@Z", /*Segment=*/1, /*Offset=*/256},
+    PublicSym{"??_EDerived3@@W7EAAPEAXI at Z", /*Segment=*/1, /*Offset=*/268},
+    PublicSym{"??_GBase@@UEAAPEAXI at Z", /*Segment=*/1, /*Offset=*/288},
+    PublicSym{"??_EBase@@UEAAPEAXI at Z", /*Segment=*/1, /*Offset=*/288},
+    PublicSym{"??_EDerived2@@UEAAPEAXI at Z", /*Segment=*/1, /*Offset=*/352},
+    PublicSym{"??_EDerived@@UEAAPEAXI at Z", /*Segment=*/1, /*Offset=*/352},
+    PublicSym{"??_GDerived@@UEAAPEAXI at Z", /*Segment=*/1, /*Offset=*/352},
+    PublicSym{"??_GDerived2@@UEAAPEAXI at Z", /*Segment=*/1, /*Offset=*/352},
+    PublicSym{"??_EDerived3@@UEAAPEAXI at Z", /*Segment=*/1, /*Offset=*/416},
+    PublicSym{"??_GDerived3@@UEAAPEAXI at Z", /*Segment=*/1, /*Offset=*/416},
+    PublicSym{"?AMethod at AClass@@QEAAXHPEAD at Z", /*Segment=*/1, /*Offset=*/480},
+    PublicSym{"?Something at AClass@@SA_ND at Z", /*Segment=*/1, /*Offset=*/496},
+    PublicSym{"?dup1@@YAHH at Z", /*Segment=*/1, /*Offset=*/544},
+    PublicSym{"?dup3@@YAHH at Z", /*Segment=*/1, /*Offset=*/544},
+    PublicSym{"?dup2@@YAHH at Z", /*Segment=*/1, /*Offset=*/544},
+    PublicSym{"?foobar@@YAHH at Z", /*Segment=*/1, /*Offset=*/560},
+    PublicSym{"main", /*Segment=*/1, /*Offset=*/576},
+    PublicSym{"??_7Base@@6B@", /*Segment=*/2, /*Offset=*/0},
+    PublicSym{"??_7Derived@@6B@", /*Segment=*/2, /*Offset=*/8},
+    PublicSym{"??_7Derived2@@6B@", /*Segment=*/2, /*Offset=*/8},
+    PublicSym{"??_7Derived3@@6BDerived2@@@", /*Segment=*/2, /*Offset=*/16},
+    PublicSym{"??_7Derived3@@6BDerived@@@", /*Segment=*/2, /*Offset=*/24},
+    PublicSym{"?AGlobal@@3HA", /*Segment=*/3, /*Offset=*/0},
+};
+
+} // namespace
+
+static std::pair<uint16_t, uint32_t>
+nthSymbolAddress(PublicsStream *Publics, SymbolStream *Symbols, size_t N) {
+  auto Index = Publics->getAddressMap()[N].value();
+  codeview::CVSymbol Sym = Symbols->readRecord(Index);
+  auto ExpectedPub =
+      codeview::SymbolDeserializer::deserializeAs<codeview::PublicSym32>(Sym);
+  if (!ExpectedPub)
+    return std::pair(0, 0);
+  return std::pair(ExpectedPub->Segment, ExpectedPub->Offset);
 }
 
 TEST(PublicsStreamTest, FindByAddress) {
-  std::string ExePath = getExePath();
-  auto Buffer = MemoryBuffer::getFile(ExePath, /*IsText=*/false,
-                                      /*RequiresNullTerminator=*/false);
-  ASSERT_TRUE(bool(Buffer));
-  auto Stream = std::make_unique<MemoryBufferByteStream>(
-      std::move(*Buffer), llvm::endianness::little);
-
-  BumpPtrAllocator Alloc;
-  PDBFile File(ExePath, std::move(Stream), Alloc);
-  ASSERT_FALSE(bool(File.parseFileHeaders()));
-  ASSERT_FALSE(bool(File.parseStreamData()));
-
-  auto Publics = File.getPDBPublicsStream();
-  ASSERT_TRUE(bool(Publics));
-  auto Symbols = File.getPDBSymbolStream();
-  ASSERT_TRUE(bool(Symbols));
+  BumpPtrAllocator Allocator;
+  auto ExpectedMock = MockPublics::create(Allocator, 1 << 20);
+  ASSERT_TRUE(bool(ExpectedMock));
+  std::unique_ptr<MockPublics> Mock = std::move(*ExpectedMock);
+
+  Mock->addPublics(GSymbols);
+  Error Err = Mock->finish();
+  ASSERT_FALSE(Err) << Err;
+
+  auto *Publics = Mock->publicsStream();
+  ASSERT_NE(Publics, nullptr);
+  Err = Publics->reload();
+  ASSERT_FALSE(Err) << Err;
+
+  auto *Symbols = Mock->symbolStream();
+  ASSERT_NE(Symbols, nullptr);
+  Err = Symbols->reload();
+  ASSERT_FALSE(Err) << Err;
 
   auto VTableDerived = Publics->findByAddress(*Symbols, 2, 8);
   ASSERT_TRUE(VTableDerived.has_value());
@@ -52,6 +189,16 @@ TEST(PublicsStreamTest, FindByAddress) {
   ASSERT_EQ(VTableDerived->first.Name, "??_7Derived2@@6B@");
   ASSERT_EQ(VTableDerived->second, 26u);
 
+  // Again, make sure that we find the first symbol
+  auto VectorDtorDerived = Publics->findByAddress(*Symbols, 1, 352);
+  ASSERT_TRUE(VectorDtorDerived.has_value());
+  ASSERT_EQ(VectorDtorDerived->first.Name, "??_EDerived2@@UEAAPEAXI at Z");
+  ASSERT_EQ(VectorDtorDerived->second, 12u);
+  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 13), std::pair(1, 352));
+  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 14), std::pair(1, 352));
+  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 15), std::pair(1, 352));
+  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 16), std::pair(1, 416));
+
   ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value());
   ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value());
 

>From c17d0cf3f4760b3440e561c960d29455fa32d5dc Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Tue, 9 Sep 2025 17:06:21 +0200
Subject: [PATCH 6/8] fix: use tuples in the last equality check

---
 llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index 73c00538d3f4c..0453eea26605b 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -100,18 +100,6 @@ Error PublicsStream::reload() {
   return Error::success();
 }
 
-static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffset,
-                                     uint16_t RhsSegment, uint32_t RhsOffset) {
-  if (LhsSegment == RhsSegment)
-    return LhsOffset - RhsOffset;
-  return LhsSegment - RhsSegment;
-}
-
-static uint32_t compareSegmentOffset(uint16_t LhsSegment, uint32_t LhsOffst,
-                                     const codeview::PublicSym32 &Rhs) {
-  return compareSegmentOffset(LhsSegment, LhsOffst, Rhs.Segment, Rhs.Offset);
-}
-
 // This is a reimplementation of NearestSym:
 // https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581
 std::optional<std::pair<codeview::PublicSym32, size_t>>
@@ -152,8 +140,7 @@ PublicsStream::findByAddress(const SymbolStream &Symbols, uint16_t Segment,
   }
   codeview::PublicSym32 Psym = std::move(*MaybePsym);
 
-  uint32_t Cmp = compareSegmentOffset(Segment, Offset, Psym);
-  if (Cmp != 0)
+  if (std::tuple(Segment, Offset) != std::tuple(Psym.Segment, Psym.Offset))
     return std::nullopt;
 
   std::ptrdiff_t IterOffset = It - AddressMap.begin();

>From b27edebfafa82c69d5acb13291b7b933f4f279cc Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Tue, 9 Sep 2025 17:14:49 +0200
Subject: [PATCH 7/8] test: add check for corrupted debug info

---
 .../DebugInfo/PDB/PublicsStreamTest.cpp         | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
index 0b12285a16d89..e7d108eb10e1a 100644
--- a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
+++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
@@ -42,6 +42,8 @@ class MockPublics {
   PublicsStream *publicsStream();
   SymbolStream *symbolStream();
 
+  MutableBinaryByteStream &stream() { return Stream; }
+
 private:
   MutableBinaryByteStream Stream;
 
@@ -206,4 +208,19 @@ TEST(PublicsStreamTest, FindByAddress) {
   ASSERT_TRUE(GlobalSym.has_value());
   ASSERT_EQ(GlobalSym->first.Name, "?AGlobal@@3HA");
   ASSERT_EQ(GlobalSym->second, 30u);
+
+  // test corrupt debug info
+  codeview::CVSymbol GlobalCVSym =
+      Symbols->readRecord(Publics->getAddressMap()[30]);
+  ASSERT_EQ(GlobalCVSym.kind(), codeview::S_PUB32);
+  // CVSymbol::data returns a pointer to const data, so we modify the backing
+  // data
+  uint8_t *PDBData = Mock->stream().data().data();
+  auto Offset = GlobalCVSym.data().data() - PDBData;
+  reinterpret_cast<codeview::RecordPrefix *>(PDBData + Offset)->RecordKind =
+      codeview::S_GDATA32;
+  ASSERT_EQ(GlobalCVSym.kind(), codeview::S_GDATA32);
+
+  GlobalSym = Publics->findByAddress(*Symbols, 3, 0);
+  ASSERT_FALSE(GlobalSym.has_value());
 }

>From 413ebd125c46e942dc53f7de440ba35352c540f6 Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Tue, 9 Sep 2025 18:03:08 +0200
Subject: [PATCH 8/8] fix: comparison

---
 llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
index e7d108eb10e1a..4b89280cbdb93 100644
--- a/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
+++ b/llvm/unittests/DebugInfo/PDB/PublicsStreamTest.cpp
@@ -153,7 +153,7 @@ std::array GSymbols{
 
 } // namespace
 
-static std::pair<uint16_t, uint32_t>
+static std::pair<uint32_t, uint32_t>
 nthSymbolAddress(PublicsStream *Publics, SymbolStream *Symbols, size_t N) {
   auto Index = Publics->getAddressMap()[N].value();
   codeview::CVSymbol Sym = Symbols->readRecord(Index);
@@ -196,10 +196,10 @@ TEST(PublicsStreamTest, FindByAddress) {
   ASSERT_TRUE(VectorDtorDerived.has_value());
   ASSERT_EQ(VectorDtorDerived->first.Name, "??_EDerived2@@UEAAPEAXI at Z");
   ASSERT_EQ(VectorDtorDerived->second, 12u);
-  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 13), std::pair(1, 352));
-  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 14), std::pair(1, 352));
-  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 15), std::pair(1, 352));
-  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 16), std::pair(1, 416));
+  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 13), std::pair(1u, 352u));
+  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 14), std::pair(1u, 352u));
+  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 15), std::pair(1u, 352u));
+  ASSERT_EQ(nthSymbolAddress(Publics, Symbols, 16), std::pair(1u, 416u));
 
   ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 7).has_value());
   ASSERT_FALSE(Publics->findByAddress(*Symbols, 2, 9).has_value());



More information about the llvm-commits mailing list