[llvm] 464f65a - [DebugInfo][DWARF] Utilize DW_AT_LLVM_stmt_sequence attr in line table lookups (#123391)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 6 15:41:33 PST 2025


Author: alx32
Date: 2025-02-06T15:41:29-08:00
New Revision: 464f65adacd812aead775308ab7f1952fee774db

URL: https://github.com/llvm/llvm-project/commit/464f65adacd812aead775308ab7f1952fee774db
DIFF: https://github.com/llvm/llvm-project/commit/464f65adacd812aead775308ab7f1952fee774db.diff

LOG: [DebugInfo][DWARF] Utilize DW_AT_LLVM_stmt_sequence attr in line table lookups (#123391)

**Summary**
Add support for filtering line table entries based on
`DW_AT_LLVM_stmt_sequence` attribute when looking up address ranges.
This ensures that line entries are correctly attributed to their
corresponding functions, even when multiple functions share the same
address range due to optimizations.

**Background**
In https://github.com/llvm/llvm-project/pull/110192 we added support to
clang to generate the `DW_AT_LLVM_stmt_sequence` attribute for
`DW_TAG_subprogram`'s. Corresponding RFC: [New DWARF Attribute for
Symbolication of Merged
Functions](https://discourse.llvm.org/t/rfc-new-dwarf-attribute-for-symbolication-of-merged-functions/79434)

The `DW_AT_LLVM_stmt_sequence` attribute allows accurate attribution of
line number information to their corresponding functions, even in
scenarios where functions are merged or share the same address space due
to optimizations like Identical Code Folding (ICF) in the linker.

**Implementation Details**
The patch modifies `DWARFDebugLine::lookupAddressRange` to accept an
optional DWARFDie parameter. When provided, the function checks if the
`DIE` has a `DW_AT_LLVM_stmt_sequence` attribute. This attribute
contains an offset into the line table that marks where the line entries
for this DIE's function begin.

If the attribute is present, the function filters the results to only
include line entries from the sequence that starts at the specified
offset. This ensures that even when multiple functions share the same
address range, we return only the line entries that actually belong to
the function represented by the DIE.

The implementation:
- Adds an optional DWARFDie parameter to lookupAddressRange
- Extracts the `DW_AT_LLVM_stmt_sequence` offset if present
- Modifies the address range lookup logic to filter sequences based on
their offset
- Returns only line entries from the matching sequence

Added: 
    

Modified: 
    llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
    llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
    llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index ff7bf87d8e6b5a8..61f17a27b3d286c 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -209,6 +209,9 @@ class DWARFDebugLine {
     unsigned LastRowIndex;
     bool Empty;
 
+    /// The offset into the line table where this sequence begins
+    uint64_t StmtSeqOffset = UINT64_MAX;
+
     void reset();
 
     static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
@@ -243,8 +246,20 @@ class DWARFDebugLine {
     uint32_t lookupAddress(object::SectionedAddress Address,
                            bool *IsApproximateLine = nullptr) const;
 
-    bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
-                            std::vector<uint32_t> &Result) const;
+    /// Fills the Result argument with the indices of the rows that correspond
+    /// to the address range specified by \p Address and \p Size.
+    ///
+    /// \param Address - The starting address of the range.
+    /// \param Size - The size of the address range.
+    /// \param Result - The vector to fill with row indices.
+    /// \param StmtSequenceOffset - if provided, only rows from the sequence
+    /// starting at the matching offset will be added to the result.
+    ///
+    /// Returns true if any rows were found.
+    bool lookupAddressRange(
+        object::SectionedAddress Address, uint64_t Size,
+        std::vector<uint32_t> &Result,
+        std::optional<uint64_t> StmtSequenceOffset = std::nullopt) const;
 
     bool hasFileAtIndex(uint64_t FileIndex) const {
       return Prologue.hasFileAtIndex(FileIndex);
@@ -305,8 +320,20 @@ class DWARFDebugLine {
     uint32_t lookupAddressImpl(object::SectionedAddress Address,
                                bool *IsApproximateLine = nullptr) const;
 
-    bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
-                                std::vector<uint32_t> &Result) const;
+    /// Fills the Result argument with the indices of the rows that correspond
+    /// to the address range specified by \p Address and \p Size.
+    ///
+    /// \param Address - The starting address of the range.
+    /// \param Size - The size of the address range.
+    /// \param Result - The vector to fill with row indices.
+    /// \param StmtSequenceOffset - if provided, only rows from the sequence
+    /// starting at the matching offset will be added to the result.
+    ///
+    /// Returns true if any rows were found.
+    bool
+    lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
+                           std::vector<uint32_t> &Result,
+                           std::optional<uint64_t> StmtSequenceOffset) const;
   };
 
   const LineTable *getLineTable(uint64_t Offset) const;
@@ -376,7 +403,7 @@ class DWARFDebugLine {
     ParsingState(struct LineTable *LT, uint64_t TableOffset,
                  function_ref<void(Error)> ErrorHandler);
 
-    void resetRowAndSequence();
+    void resetRowAndSequence(uint64_t Offset);
     void appendRowToMatrix();
 
     struct AddrOpIndexDelta {

diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index a30ae163fa5a989..adcd0aa32942010 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -531,6 +531,7 @@ void DWARFDebugLine::Sequence::reset() {
   FirstRowIndex = 0;
   LastRowIndex = 0;
   Empty = true;
+  StmtSeqOffset = UINT64_MAX;
 }
 
 DWARFDebugLine::LineTable::LineTable() { clear(); }
@@ -561,13 +562,12 @@ void DWARFDebugLine::LineTable::clear() {
 DWARFDebugLine::ParsingState::ParsingState(
     struct LineTable *LT, uint64_t TableOffset,
     function_ref<void(Error)> ErrorHandler)
-    : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {
-  resetRowAndSequence();
-}
+    : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {}
 
-void DWARFDebugLine::ParsingState::resetRowAndSequence() {
+void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
   Row.reset(LineTable->Prologue.DefaultIsStmt);
   Sequence.reset();
+  Sequence.StmtSeqOffset = Offset;
 }
 
 void DWARFDebugLine::ParsingState::appendRowToMatrix() {
@@ -848,6 +848,10 @@ Error DWARFDebugLine::LineTable::parse(
     *OS << '\n';
     Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
   }
+  // *OffsetPtr points to the end of the prologue - i.e. the start of the first
+  // sequence. So initialize the first sequence offset accordingly.
+  State.resetRowAndSequence(*OffsetPtr);
+
   bool TombstonedAddress = false;
   auto EmitRow = [&] {
     if (!TombstonedAddress) {
@@ -912,7 +916,9 @@ Error DWARFDebugLine::LineTable::parse(
         // into this code path - if it were invalid, the default case would be
         // followed.
         EmitRow();
-        State.resetRowAndSequence();
+        // Cursor now points to right after the end_sequence opcode - so points
+        // to the start of the next sequence - if one exists.
+        State.resetRowAndSequence(Cursor.tell());
         break;
 
       case DW_LNE_set_address:
@@ -1364,10 +1370,11 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,
 
 bool DWARFDebugLine::LineTable::lookupAddressRange(
     object::SectionedAddress Address, uint64_t Size,
-    std::vector<uint32_t> &Result) const {
+    std::vector<uint32_t> &Result,
+    std::optional<uint64_t> StmtSequenceOffset) const {
 
   // Search for relocatable addresses
-  if (lookupAddressRangeImpl(Address, Size, Result))
+  if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
     return true;
 
   if (Address.SectionIndex == object::SectionedAddress::UndefSection)
@@ -1375,12 +1382,13 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
 
   // Search for absolute addresses
   Address.SectionIndex = object::SectionedAddress::UndefSection;
-  return lookupAddressRangeImpl(Address, Size, Result);
+  return lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset);
 }
 
 bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
     object::SectionedAddress Address, uint64_t Size,
-    std::vector<uint32_t> &Result) const {
+    std::vector<uint32_t> &Result,
+    std::optional<uint64_t> StmtSequenceOffset) const {
   if (Sequences.empty())
     return false;
   uint64_t EndAddr = Address.Address + Size;
@@ -1389,16 +1397,38 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
   Sequence.SectionIndex = Address.SectionIndex;
   Sequence.HighPC = Address.Address;
   SequenceIter LastSeq = Sequences.end();
-  SequenceIter SeqPos = llvm::upper_bound(
-      Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC);
-  if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
+  SequenceIter SeqPos;
+
+  if (StmtSequenceOffset) {
+    // If we have a statement sequence offset, find the specific sequence.
+    // Linear search for sequence with matching StmtSeqOffset
+    SeqPos = std::find_if(Sequences.begin(), LastSeq,
+                          [&](const DWARFDebugLine::Sequence &S) {
+                            return S.StmtSeqOffset == *StmtSequenceOffset;
+                          });
+
+    // If sequence not found, return false
+    if (SeqPos == LastSeq)
+      return false;
+
+    // Set LastSeq to the next sequence since we only want the one matching
+    // sequence (sequences are guaranteed to have unique StmtSeqOffset)
+    LastSeq = SeqPos + 1;
+  } else {
+    // No specific sequence requested, find first sequence containing address
+    SeqPos = std::upper_bound(Sequences.begin(), LastSeq, Sequence,
+                              DWARFDebugLine::Sequence::orderByHighPC);
+    if (SeqPos == LastSeq)
+      return false;
+  }
+
+  // If the start sequence doesn't contain the address, nothing to do
+  if (!SeqPos->containsPC(Address))
     return false;
 
   SequenceIter StartPos = SeqPos;
 
-  // Add the rows from the first sequence to the vector, starting with the
-  // index we just calculated
-
+  // Process sequences that overlap with the desired range
   while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
     const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
     // For the first sequence, we need to find which row in the sequence is the

diff  --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index e549128031744e6..2fe52600df923bf 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -6,10 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "DwarfGenerator.h"
 #include "DwarfUtils.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Testing/Support/Error.h"
 #include "gtest/gtest.h"
@@ -2035,4 +2035,117 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) {
   EXPECT_THAT(Result.c_str(), MatchesRegex("a dir.b dir.b file"));
 }
 
+/// Test that lookupAddressRange correctly filters rows based on
+/// a statement-sequence offset (simulating DW_AT_LLVM_stmt_sequence).
+///
+/// This test verifies that:
+/// 1. When a statement-sequence offset is provided, lookupAddressRange
+///    only returns rows from the sequence starting at that offset.
+/// 2. When an invalid statement-sequence offset is provided, no rows
+///    are returned.
+/// 3. When no statement-sequence offset is provided, all matching rows
+///    in the table are returned.
+///
+/// We build a line table with two sequences at the same address range
+/// but 
diff erent line numbers. Then we try lookups with various statement-
+/// sequence offsets to check the filtering logic.
+TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
+  if (!setupGenerator())
+    GTEST_SKIP();
+
+  // Create a line table that has two sequences covering [0x1000, 0x1004).
+  // Each sequence has two rows: addresses at 0x1000 and 0x1004, but
+  // they 
diff er by line numbers (100 vs. 200, etc.).
+  //
+  // We'll pretend the first sequence starts at offset 0x2e in the line table,
+  // the second at 0x42, and we'll also test an invalid offset 0x66.
+
+  LineTable &LT = Gen->addLineTable();
+
+  // First sequence at offset 0x2e: addresses 0x1000(Ln=100), 0x1004(Ln=101)
+  LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
+  LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+  // Advance the line register by 99 (so line=100) and copy.
+  LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}});
+  LT.addStandardOpcode(DW_LNS_copy, {});
+  // 0x4b is a special opcode: address += 4, line += 1 (so line=101).
+  LT.addByte(0x4b);
+  // End this sequence.
+  LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
+
+  // Second sequence at offset 0x42: addresses 0x1000(Ln=200), 0x1004(Ln=201)
+  LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
+  LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+  LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}});
+  LT.addStandardOpcode(DW_LNS_copy, {});
+  // 0x4b again: address += 4, line += 1 (so line=201).
+  LT.addByte(0x4b);
+  // End this second sequence.
+  LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
+
+  // Generate the DWARF data.
+  generate();
+
+  // Parse the line table.
+  auto ExpectedLineTable =
+      Line.getOrParseLineTable(LineData, /*Offset=*/0, *Context,
+                               /*DwarfUnit=*/nullptr, RecordRecoverable);
+  ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded());
+  const auto *Table = *ExpectedLineTable;
+
+  // The table should have two sequences, each starting at our chosen offsets.
+  ASSERT_EQ(Table->Sequences.size(), 2u);
+
+  // 1) Try looking up with an invalid offset (simulating an invalid
+  //    DW_AT_LLVM_stmt_sequence). We expect no rows.
+  {
+    std::vector<uint32_t> Rows;
+    bool Found = Table->lookupAddressRange(
+        {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+        /*StmtSequenceOffset=*/0x66); // invalid offset
+    EXPECT_FALSE(Found);
+    EXPECT_TRUE(Rows.empty());
+  }
+
+  // 2) Look up using the offset 0x2e (our first sequence). We expect
+  //    to get the rows from that sequence only (which for 0x1000 is row #0).
+  {
+    std::vector<uint32_t> Rows;
+    bool Found = Table->lookupAddressRange(
+        {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+        /*StmtSequenceOffset=*/0x2e);
+    EXPECT_TRUE(Found);
+    ASSERT_EQ(Rows.size(), 1u);
+    // The first sequence's first row is index 0.
+    EXPECT_EQ(Rows[0], 0u);
+  }
+
+  // 3) Look up using the offset 0x42 (second sequence). For address 0x1000
+  //    in that second sequence, we should see row #2.
+  {
+    std::vector<uint32_t> Rows;
+    bool Found = Table->lookupAddressRange(
+        {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+        /*StmtSequenceOffset=*/0x42);
+    EXPECT_TRUE(Found);
+    ASSERT_EQ(Rows.size(), 1u);
+    // The second sequence's first row is index 2 in the table.
+    EXPECT_EQ(Rows[0], 3u);
+  }
+
+  // 4) Look up with no statement-sequence offset specified.
+  //    We should get rows from both sequences for address 0x1000.
+  {
+    std::vector<uint32_t> Rows;
+    bool Found = Table->lookupAddressRange(
+        {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+        std::nullopt /* no filter */);
+    EXPECT_TRUE(Found);
+    // The first sequence's row is #0, second's row is #2, so both should
+    // appear.
+    ASSERT_EQ(Rows.size(), 2u);
+    EXPECT_EQ(Rows[0], 0u);
+    EXPECT_EQ(Rows[1], 3u);
+  }
+}
 } // end anonymous namespace


        


More information about the llvm-commits mailing list