[llvm] [DebugInfo][DWARF] Utilize DW_AT_LLVM_stmt_sequence attr in line table lookups (PR #123391)

David Blaikie via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 5 15:09:03 PST 2025


================
@@ -1364,66 +1370,111 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,
 
 bool DWARFDebugLine::LineTable::lookupAddressRange(
     object::SectionedAddress Address, uint64_t Size,
-    std::vector<uint32_t> &Result) const {
+    std::vector<uint32_t> &Result,
+    std::optional<uint64_t> StmtSequenceOffset) const {
 
   // Search for relocatable addresses
-  if (lookupAddressRangeImpl(Address, Size, Result))
+  if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
     return true;
 
   if (Address.SectionIndex == object::SectionedAddress::UndefSection)
     return false;
 
   // Search for absolute addresses
   Address.SectionIndex = object::SectionedAddress::UndefSection;
-  return lookupAddressRangeImpl(Address, Size, Result);
+  return lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset);
 }
 
 bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
     object::SectionedAddress Address, uint64_t Size,
-    std::vector<uint32_t> &Result) const {
+    std::vector<uint32_t> &Result,
+    std::optional<uint64_t> StmtSequenceOffset) const {
   if (Sequences.empty())
     return false;
-  uint64_t EndAddr = Address.Address + Size;
-  // First, find an instruction sequence containing the given address.
-  DWARFDebugLine::Sequence Sequence;
-  Sequence.SectionIndex = Address.SectionIndex;
-  Sequence.HighPC = Address.Address;
-  SequenceIter LastSeq = Sequences.end();
-  SequenceIter SeqPos = llvm::upper_bound(
-      Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC);
-  if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
-    return false;
 
-  SequenceIter StartPos = SeqPos;
+  const uint64_t EndAddr = Address.Address + Size;
+
+  // Helper: Given a sequence and a starting address, find the subset
+  // of rows within [Address, EndAddr) and append them to `Result`.
+  auto addRowsFromSequence = [&](const Sequence &Seq,
+                                 object::SectionedAddress StartAddr,
+                                 uint64_t EndAddress, bool IsFirstSequence) {
+    // If this sequence does not intersect our [StartAddr, EndAddress) range,
+    // do nothing.
+    if (Seq.HighPC <= StartAddr.Address || Seq.LowPC >= EndAddress)
+      return;
 
-  // Add the rows from the first sequence to the vector, starting with the
-  // index we just calculated
+    // For the "first" sequence in the search, we must figure out which row
+    // actually starts within our address range.
+    uint32_t FirstRowIndex = Seq.FirstRowIndex;
+    if (IsFirstSequence)
+      FirstRowIndex = findRowInSeq(Seq, StartAddr);
 
-  while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
-    const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
-    // For the first sequence, we need to find which row in the sequence is the
-    // first in our range.
-    uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
-    if (SeqPos == StartPos)
-      FirstRowIndex = findRowInSeq(CurSeq, Address);
-
-    // Figure out the last row in the range.
+    // Similarly, compute the last row that is within [StartAddr, EndAddress).
     uint32_t LastRowIndex =
-        findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex});
+        findRowInSeq(Seq, {EndAddress - 1, StartAddr.SectionIndex});
     if (LastRowIndex == UnknownRowIndex)
-      LastRowIndex = CurSeq.LastRowIndex - 1;
+      LastRowIndex = Seq.LastRowIndex - 1;
 
     assert(FirstRowIndex != UnknownRowIndex);
     assert(LastRowIndex != UnknownRowIndex);
 
+    // Append all row indices in [FirstRowIndex, LastRowIndex].
     for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) {
       Result.push_back(I);
     }
+  };
 
+  // If a stmt_sequence_offset was provided, do a binary search to find the
+  // single sequence that matches that offset. Then add only its relevant rows.
+  if (StmtSequenceOffset) {
+    // Binary-search the Sequences by their StmtSeqOffset:
+    auto It = llvm::lower_bound(Sequences, *StmtSequenceOffset,
+                                [](const Sequence &Seq, uint64_t OffsetVal) {
+                                  return Seq.StmtSeqOffset < OffsetVal;
+                                });
+
+    // If not found or mismatched, there’s no match to return.
+    if (It == Sequences.end() || It->StmtSeqOffset != *StmtSequenceOffset)
+      return false;
+
+    // Now add rows from the discovered sequence if it intersects [Address,
+    // EndAddr).
+    addRowsFromSequence(*It, Address, EndAddr, /*IsFirstSequence=*/true);
+    return !Result.empty();
+  }
+
+  // Otherwise, fall back to logic of walking sequences by Address.
+  // We first find a sequence containing `Address` (via upper_bound on HighPC),
+  // then proceed forward through overlapping sequences in ascending order.
+
+  // Construct a dummy Sequence to find where `Address` fits by HighPC.
+  DWARFDebugLine::Sequence SearchSeq;
+  SearchSeq.SectionIndex = Address.SectionIndex;
+  SearchSeq.HighPC = Address.Address;
+
+  auto LastSeq = Sequences.end();
+  auto SeqPos = llvm::upper_bound(Sequences, SearchSeq,
+                                  DWARFDebugLine::Sequence::orderByHighPC);
+  if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
+    return false;
+
+  // This marks the first sequence we found that might contain Address.
+  const auto StartPos = SeqPos;
+
+  // Walk forward until sequences no longer intersect [Address, EndAddr).
+  while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
+    // Add rows only if the sequence is in the same section:
+    if (SeqPos->SectionIndex == Address.SectionIndex) {
----------------
dwblaikie wrote:

Yep, this all seems more complex than I had in mind.

Oen question on this line in particular, is where did this SectionIndex comparison come from? I don't see it in the original code?

But also, more generally, what I had in mind was that this loop `(SeqPos != LastSeq && ... ` could be made multi-purpose. If the SeqPos and LastPos were either initialized as they are in the code today, /or/ based on the `StmtSequenceOffset` if it's specified (look up the then SeqPos would be initialized to the sequence pointed to by the StmtSequenceOffset, and LastSeq would be initialized to SeqPos + 1)

Then this feels like it should be a very narrow change - the code mostly remains the same, except for the SeqPos/LastPos initialization?

https://github.com/llvm/llvm-project/pull/123391


More information about the llvm-commits mailing list