[llvm] [BOLT] Fix possibly incorrect CU-indicies in gdb-index (PR #151927)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 5 01:47:41 PDT 2025


https://github.com/itrofimow updated https://github.com/llvm/llvm-project/pull/151927

>From 78ce26965bb15a197d73a1d7ce7d5fbfb37c70da Mon Sep 17 00:00:00 2001
From: Ivan Trofimov <i.trofimow at yandex.ru>
Date: Mon, 4 Aug 2025 12:07:11 +0300
Subject: [PATCH 1/4] [BOLT] Fix possibly incorrect CU-indicies in gdb-index

---
 bolt/lib/Core/GDBIndex.cpp | 75 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Core/GDBIndex.cpp b/bolt/lib/Core/GDBIndex.cpp
index c7fb4889646b4..0fe1c5de94138 100644
--- a/bolt/lib/Core/GDBIndex.cpp
+++ b/bolt/lib/Core/GDBIndex.cpp
@@ -130,6 +130,26 @@ void GDBIndex::updateGdbIndexSection(
             [](const MapEntry &E1, const MapEntry &E2) -> bool {
               return E1.second.Offset < E2.second.Offset;
             });
+  // Create the original CU index -> updated CU index mapping,
+  // as the sort above could've changed the order and we have to update
+  // indexes correspondingly in address map and constant pool.
+  std::unordered_map<uint32_t, uint32_t> OriginalCUIndexToUpdatedCUIndexMap;
+  OriginalCUIndexToUpdatedCUIndexMap.reserve(CUVector.size());
+  for (uint32_t I = 0; I < CUVector.size(); ++I) {
+    OriginalCUIndexToUpdatedCUIndexMap[OffsetToIndexMap.at(CUVector[I].first)] =
+        I;
+  }
+  const auto RemapCUIndex =
+      [&OriginalCUIndexToUpdatedCUIndexMap](uint32_t OriginalIndex) {
+        const auto it = OriginalCUIndexToUpdatedCUIndexMap.find(OriginalIndex);
+        if (it == OriginalCUIndexToUpdatedCUIndexMap.end()) {
+          errs() << "BOLT-ERROR: .gdb_index unknown CU index\n";
+          exit(1);
+        }
+
+        return it->second;
+      };
+
   // Writing out CU List <Offset, Size>
   for (auto &CUInfo : CUVector) {
     // Skipping TU for DWARF5 when they are not included in CU list.
@@ -160,12 +180,13 @@ void GDBIndex::updateGdbIndexSection(
   // Generate new address table.
   for (const std::pair<const uint64_t, DebugAddressRangesVector> &CURangesPair :
        ARangesSectionWriter.getCUAddressRanges()) {
-    const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first];
+    const uint32_t OriginalCUIndex = OffsetToIndexMap[CURangesPair.first];
+    const uint32_t UpdatedCUIndex = RemapCUIndex(OriginalCUIndex);
     const DebugAddressRangesVector &Ranges = CURangesPair.second;
     for (const DebugAddressRange &Range : Ranges) {
       write64le(Buffer, Range.LowPC);
       write64le(Buffer + 8, Range.HighPC);
-      write32le(Buffer + 16, CUIndex);
+      write32le(Buffer + 16, UpdatedCUIndex);
       Buffer += 20;
     }
   }
@@ -178,6 +199,56 @@ void GDBIndex::updateGdbIndexSection(
   // Copy over the rest of the original data.
   memcpy(Buffer, Data, TrailingSize);
 
+  // Fixup CU-indicies in constant pool.
+  const char *const OriginalConstantPoolData =
+      GdbIndexContents.data() + ConstantPoolOffset;
+  uint8_t *const UpdatedConstantPoolData =
+      NewGdbIndexContents + ConstantPoolOffset + Delta;
+
+  const char *OriginalSymbolTableData =
+      GdbIndexContents.data() + SymbolTableOffset;
+  std::set<uint32_t> CUVectorOffsets;
+  // Parse the symbol map and extract constant pool CU offsets from it.
+  while (OriginalSymbolTableData < OriginalConstantPoolData) {
+    const uint32_t NameOffset = read32le(OriginalSymbolTableData);
+    const uint32_t CUVectorOffset = read32le(OriginalSymbolTableData + 4);
+    OriginalSymbolTableData += 8;
+
+    // Iff both are zero, then the slot is considered empty in the hash-map.
+    if (NameOffset || CUVectorOffset) {
+      CUVectorOffsets.insert(CUVectorOffset);
+    }
+  }
+
+  // Update the CU-indicies in the constant pool
+  for (const auto CUVectorOffset : CUVectorOffsets) {
+    const char *CurrentOriginalConstantPoolData =
+        OriginalConstantPoolData + CUVectorOffset;
+    uint8_t *CurrentUpdatedConstantPoolData =
+        UpdatedConstantPoolData + CUVectorOffset;
+
+    const uint32_t Num = read32le(CurrentOriginalConstantPoolData);
+    CurrentOriginalConstantPoolData += 4;
+    CurrentUpdatedConstantPoolData += 4;
+
+    for (uint32_t J = 0; J < Num; ++J) {
+      const uint32_t OriginalCUIndexAndAttributes =
+          read32le(CurrentOriginalConstantPoolData);
+      CurrentOriginalConstantPoolData += 4;
+
+      // We only care for the index, which is the lowest 24 bits, other bits are
+      // left as is.
+      const uint32_t OriginalCUIndex =
+          OriginalCUIndexAndAttributes & ((1 << 24) - 1);
+      const uint32_t Attributes = OriginalCUIndexAndAttributes >> 24;
+      const uint32_t UpdatedCUIndexAndAttributes =
+          RemapCUIndex(OriginalCUIndex) | (Attributes << 24);
+
+      write32le(CurrentUpdatedConstantPoolData, UpdatedCUIndexAndAttributes);
+      CurrentUpdatedConstantPoolData += 4;
+    }
+  }
+
   // Register the new section.
   BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents,
                                  NewGdbIndexSize);

>From e7880538e7911cfe2d502f1628d4f48ce9080467 Mon Sep 17 00:00:00 2001
From: Ivan Trofimov <i.trofimow at yandex.ru>
Date: Mon, 4 Aug 2025 12:17:12 +0300
Subject: [PATCH 2/4] typo fixes

---
 bolt/lib/Core/GDBIndex.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Core/GDBIndex.cpp b/bolt/lib/Core/GDBIndex.cpp
index 0fe1c5de94138..fbbbac1ee910f 100644
--- a/bolt/lib/Core/GDBIndex.cpp
+++ b/bolt/lib/Core/GDBIndex.cpp
@@ -132,7 +132,7 @@ void GDBIndex::updateGdbIndexSection(
             });
   // Create the original CU index -> updated CU index mapping,
   // as the sort above could've changed the order and we have to update
-  // indexes correspondingly in address map and constant pool.
+  // indices correspondingly in address map and constant pool.
   std::unordered_map<uint32_t, uint32_t> OriginalCUIndexToUpdatedCUIndexMap;
   OriginalCUIndexToUpdatedCUIndexMap.reserve(CUVector.size());
   for (uint32_t I = 0; I < CUVector.size(); ++I) {
@@ -199,7 +199,7 @@ void GDBIndex::updateGdbIndexSection(
   // Copy over the rest of the original data.
   memcpy(Buffer, Data, TrailingSize);
 
-  // Fixup CU-indicies in constant pool.
+  // Fixup CU-indices in constant pool.
   const char *const OriginalConstantPoolData =
       GdbIndexContents.data() + ConstantPoolOffset;
   uint8_t *const UpdatedConstantPoolData =

>From 82a7610e46daec7e2f23b047875af14256d77057 Mon Sep 17 00:00:00 2001
From: Ivan Trofimov <i.trofimow at yandex.ru>
Date: Mon, 4 Aug 2025 20:45:11 +0300
Subject: [PATCH 3/4] fix the remapping logic for skipped CUs and indices into
 CU TU List

---
 bolt/lib/Core/GDBIndex.cpp | 40 ++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/bolt/lib/Core/GDBIndex.cpp b/bolt/lib/Core/GDBIndex.cpp
index fbbbac1ee910f..95d016292df86 100644
--- a/bolt/lib/Core/GDBIndex.cpp
+++ b/bolt/lib/Core/GDBIndex.cpp
@@ -125,6 +125,14 @@ void GDBIndex::updateGdbIndexSection(
 
   using MapEntry = std::pair<uint32_t, CUInfo>;
   std::vector<MapEntry> CUVector(CUMap.begin(), CUMap.end());
+  // Remove the CUs we won't emit anyway.
+  CUVector.erase(std::remove_if(CUVector.begin(), CUVector.end(),
+                                [&OriginalOffsets](const MapEntry &It) {
+                                  // Skipping TU for DWARF5 when they are not
+                                  // included in CU list.
+                                  return OriginalOffsets.count(It.first) == 0;
+                                }),
+                 CUVector.end());
   // Need to sort since we write out all of TUs in .debug_info before CUs.
   std::sort(CUVector.begin(), CUVector.end(),
             [](const MapEntry &E1, const MapEntry &E2) -> bool {
@@ -139,22 +147,30 @@ void GDBIndex::updateGdbIndexSection(
     OriginalCUIndexToUpdatedCUIndexMap[OffsetToIndexMap.at(CUVector[I].first)] =
         I;
   }
-  const auto RemapCUIndex =
-      [&OriginalCUIndexToUpdatedCUIndexMap](uint32_t OriginalIndex) {
-        const auto it = OriginalCUIndexToUpdatedCUIndexMap.find(OriginalIndex);
-        if (it == OriginalCUIndexToUpdatedCUIndexMap.end()) {
-          errs() << "BOLT-ERROR: .gdb_index unknown CU index\n";
-          exit(1);
-        }
+  const auto RemapCUIndex = [&OriginalCUIndexToUpdatedCUIndexMap,
+                             CUVectorSize = CUVector.size(),
+                             TUVectorSize = getGDBIndexTUEntryVector().size()](
+                                uint32_t OriginalIndex) {
+    if (OriginalIndex >= CUVectorSize) {
+      if (OriginalIndex >= CUVectorSize + TUVectorSize) {
+        errs() << "BOLT-ERROR: .gdb_index unknown CU index\n";
+        exit(1);
+      }
+      // The index is into TU CU List, which we don't reorder, so return as is.
+      return OriginalIndex;
+    }
 
-        return it->second;
-      };
+    const auto It = OriginalCUIndexToUpdatedCUIndexMap.find(OriginalIndex);
+    if (It == OriginalCUIndexToUpdatedCUIndexMap.end()) {
+      errs() << "BOLT-ERROR: .gdb_index unknown CU index\n";
+      exit(1);
+    }
+
+    return It->second;
+  };
 
   // Writing out CU List <Offset, Size>
   for (auto &CUInfo : CUVector) {
-    // Skipping TU for DWARF5 when they are not included in CU list.
-    if (!OriginalOffsets.count(CUInfo.first))
-      continue;
     write64le(Buffer, CUInfo.second.Offset);
     // Length encoded in CU doesn't contain first 4 bytes that encode length.
     write64le(Buffer + 8, CUInfo.second.Length + 4);

>From c9cb15e738ca86c90d25e259446ae948627554d9 Mon Sep 17 00:00:00 2001
From: Ivan Trofimov <i.trofimow at yandex.ru>
Date: Tue, 5 Aug 2025 11:46:58 +0300
Subject: [PATCH 4/4] fix calculation of OffsetToIndexMap

---
 bolt/lib/Core/GDBIndex.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Core/GDBIndex.cpp b/bolt/lib/Core/GDBIndex.cpp
index 95d016292df86..7ccbd2e50db68 100644
--- a/bolt/lib/Core/GDBIndex.cpp
+++ b/bolt/lib/Core/GDBIndex.cpp
@@ -77,7 +77,8 @@ void GDBIndex::updateGdbIndexSection(
     exit(1);
   }
   DenseSet<uint64_t> OriginalOffsets;
-  for (unsigned Index = 0, Units = BC.DwCtx->getNumCompileUnits();
+  for (unsigned Index = 0, PresentUnitsIndex = 0,
+                Units = BC.DwCtx->getNumCompileUnits();
        Index < Units; ++Index) {
     const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index);
     if (SkipTypeUnits && CU->isTypeUnit())
@@ -90,7 +91,7 @@ void GDBIndex::updateGdbIndexSection(
     }
 
     OriginalOffsets.insert(Offset);
-    OffsetToIndexMap[Offset] = Index;
+    OffsetToIndexMap[Offset] = PresentUnitsIndex++;
   }
 
   // Ignore old address table.



More information about the llvm-commits mailing list