[llvm] [DebugInfo][DWARF] Utilize DW_AT_LLVM_stmt_sequence attr in line table lookups (PR #123391)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 5 16:05:34 PST 2025


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/123391

>From 658a02c24f452dd70df35e44444c90a696cc3f65 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Fri, 17 Jan 2025 11:33:20 -0800
Subject: [PATCH 1/7] [DebugInfo] Add option to use DW_AT_LLVM_stmt_sequence in
 line table lookups

---
 .../llvm/DebugInfo/DWARF/DWARFDebugLine.h     |  43 +++++-
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp   |  37 ++++-
 .../DebugInfo/DWARF/DWARFDebugLineTest.cpp    | 133 +++++++++++++++++-
 3 files changed, 201 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index ff7bf87d8e6b5a8..732a4bfc28ab3a3 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -209,6 +209,9 @@ class DWARFDebugLine {
     unsigned LastRowIndex;
     bool Empty;
 
+    /// The offset into the line table where this sequence begins
+    uint64_t Offset = 0;
+
     void reset();
 
     static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
@@ -243,8 +246,20 @@ class DWARFDebugLine {
     uint32_t lookupAddress(object::SectionedAddress Address,
                            bool *IsApproximateLine = nullptr) const;
 
+    /// Fills the Result argument with the indices of the rows that correspond
+    /// to the address range specified by \p Address and \p Size.
+    ///
+    /// \param Address - The starting address of the range.
+    /// \param Size - The size of the address range.
+    /// \param Result - The vector to fill with row indices.
+    /// \param Die - if provided, the function will check for a
+    /// DW_AT_LLVM_stmt_sequence attribute. If present, only rows from the
+    /// sequence starting at the matching offset will be added to the result.
+    ///
+    /// Returns true if any rows were found.
     bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
-                            std::vector<uint32_t> &Result) const;
+                            std::vector<uint32_t> &Result,
+                            const DWARFDie *Die = nullptr) const;
 
     bool hasFileAtIndex(uint64_t FileIndex) const {
       return Prologue.hasFileAtIndex(FileIndex);
@@ -305,8 +320,20 @@ class DWARFDebugLine {
     uint32_t lookupAddressImpl(object::SectionedAddress Address,
                                bool *IsApproximateLine = nullptr) const;
 
-    bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
-                                std::vector<uint32_t> &Result) const;
+    /// Fills the Result argument with the indices of the rows that correspond
+    /// to the address range specified by \p Address and \p Size.
+    ///
+    /// \param Address - The starting address of the range.
+    /// \param Size - The size of the address range.
+    /// \param Result - The vector to fill with row indices.
+    /// \param StmtSequenceOffset - if provided, only rows from the sequence
+    /// starting at the matching offset will be added to the result.
+    ///
+    /// Returns true if any rows were found.
+    bool
+    lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
+                           std::vector<uint32_t> &Result,
+                           std::optional<uint64_t> StmtSequenceOffset) const;
   };
 
   const LineTable *getLineTable(uint64_t Offset) const;
@@ -377,7 +404,15 @@ class DWARFDebugLine {
                  function_ref<void(Error)> ErrorHandler);
 
     void resetRowAndSequence();
-    void appendRowToMatrix();
+
+    /// Append the current Row to the LineTable's matrix of rows and update the
+    /// current Sequence information.
+    ///
+    /// \param LineTableOffset - the offset into the line table where the
+    /// current sequence of rows begins. This offset is stored in the Sequence
+    /// to allow filtering rows based on their originating sequence when a
+    /// DW_AT_LLVM_stmt_sequence attribute is present.
+    void appendRowToMatrix(uint64_t LineTableOffset);
 
     struct AddrOpIndexDelta {
       uint64_t AddrOffset;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 939a5163d55abc8..27b4c6a5489127f 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -570,8 +570,9 @@ void DWARFDebugLine::ParsingState::resetRowAndSequence() {
   Sequence.reset();
 }
 
-void DWARFDebugLine::ParsingState::appendRowToMatrix() {
+void DWARFDebugLine::ParsingState::appendRowToMatrix(uint64_t LineTableOffset) {
   unsigned RowNumber = LineTable->Rows.size();
+  Sequence.Offset = LineTableOffset;
   if (Sequence.Empty) {
     // Record the beginning of instruction sequence.
     Sequence.Empty = false;
@@ -848,6 +849,8 @@ Error DWARFDebugLine::LineTable::parse(
     *OS << '\n';
     Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
   }
+  uint64_t LineTableSeqOffset = *OffsetPtr;
+
   bool TombstonedAddress = false;
   auto EmitRow = [&] {
     if (!TombstonedAddress) {
@@ -857,7 +860,7 @@ Error DWARFDebugLine::LineTable::parse(
       }
       if (OS)
         State.Row.dump(*OS);
-      State.appendRowToMatrix();
+      State.appendRowToMatrix(LineTableSeqOffset);
     }
   };
   while (*OffsetPtr < EndOffset) {
@@ -913,6 +916,9 @@ Error DWARFDebugLine::LineTable::parse(
         // followed.
         EmitRow();
         State.resetRowAndSequence();
+        // Cursor now points to right after the end_sequence opcode - so points
+        // to the start of the next sequence - if one exists.
+        LineTableSeqOffset = Cursor.tell();
         break;
 
       case DW_LNE_set_address:
@@ -1364,10 +1370,18 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,
 
 bool DWARFDebugLine::LineTable::lookupAddressRange(
     object::SectionedAddress Address, uint64_t Size,
-    std::vector<uint32_t> &Result) const {
+    std::vector<uint32_t> &Result, const DWARFDie *Die) const {
+
+  // If DIE is provided, check for DW_AT_LLVM_stmt_sequence
+  std::optional<uint64_t> StmtSequenceOffset;
+  if (Die) {
+    if (auto StmtSeqAttr = Die->find(dwarf::DW_AT_LLVM_stmt_sequence)) {
+      StmtSequenceOffset = toSectionOffset(StmtSeqAttr);
+    }
+  }
 
   // Search for relocatable addresses
-  if (lookupAddressRangeImpl(Address, Size, Result))
+  if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
     return true;
 
   if (Address.SectionIndex == object::SectionedAddress::UndefSection)
@@ -1375,12 +1389,13 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
 
   // Search for absolute addresses
   Address.SectionIndex = object::SectionedAddress::UndefSection;
-  return lookupAddressRangeImpl(Address, Size, Result);
+  return lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset);
 }
 
 bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
     object::SectionedAddress Address, uint64_t Size,
-    std::vector<uint32_t> &Result) const {
+    std::vector<uint32_t> &Result,
+    std::optional<uint64_t> StmtSequenceOffset) const {
   if (Sequences.empty())
     return false;
   uint64_t EndAddr = Address.Address + Size;
@@ -1401,6 +1416,14 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
 
   while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
     const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
+
+    // Skip sequences that don't match our stmt_sequence offset if one was
+    // provided
+    if (StmtSequenceOffset && CurSeq.Offset != *StmtSequenceOffset) {
+      ++SeqPos;
+      continue;
+    }
+
     // For the first sequence, we need to find which row in the sequence is the
     // first in our range.
     uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
@@ -1423,7 +1446,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
     ++SeqPos;
   }
 
-  return true;
+  return !Result.empty();
 }
 
 std::optional<StringRef>
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index e549128031744e6..12ec63d10449f8c 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -6,10 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "DwarfGenerator.h"
 #include "DwarfUtils.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Testing/Support/Error.h"
 #include "gtest/gtest.h"
@@ -2035,4 +2035,135 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) {
   EXPECT_THAT(Result.c_str(), MatchesRegex("a dir.b dir.b file"));
 }
 
+/// Test that lookupAddressRange correctly filters rows based on
+/// DW_AT_LLVM_stmt_sequence.
+///
+/// This test verifies that:
+/// 1. When a DIE has a DW_AT_LLVM_stmt_sequence attribute, lookupAddressRange
+/// only returns rows from the sequence starting at the specified offset
+/// 2. When a DIE has an invalid DW_AT_LLVM_stmt_sequence offset, no rows are
+/// returned
+/// 3. When no DW_AT_LLVM_stmt_sequence is present, all matching rows are
+/// returned
+///
+/// The test creates a line table with two sequences at the same address range
+/// but different line numbers. It then creates three subprogram DIEs:
+/// - One with DW_AT_LLVM_stmt_sequence pointing to the first sequence
+/// - One with DW_AT_LLVM_stmt_sequence pointing to the second sequence
+/// - One with an invalid DW_AT_LLVM_stmt_sequence offset
+TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
+  if (!setupGenerator())
+    GTEST_SKIP();
+
+  // Create new DWARF with the subprogram DIE
+  dwarfgen::CompileUnit &CU = Gen->addCompileUnit();
+  dwarfgen::DIE CUDie = CU.getUnitDIE();
+
+  CUDie.addAttribute(DW_AT_name, DW_FORM_strp, "/tmp/main.c");
+  CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C);
+
+  dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram);
+  SD1.addAttribute(DW_AT_name, DW_FORM_strp, "sub1");
+  SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
+  SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
+  // DW_AT_LLVM_stmt_sequence points to the first sequence
+  SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e);
+
+  dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram);
+  SD2.addAttribute(DW_AT_name, DW_FORM_strp, "sub2");
+  SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
+  SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
+  // DW_AT_LLVM_stmt_sequence points to the second sequence
+  SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42);
+
+  dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram);
+  SD3.addAttribute(DW_AT_name, DW_FORM_strp, "sub3");
+  SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
+  SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
+  // Invalid DW_AT_LLVM_stmt_sequence
+  SD3.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x66);
+
+  // Create a line table with multiple sequences
+  LineTable &LT = Gen->addLineTable();
+
+  // First sequence with addresses 0x1000(Ln100), 0x1004(Ln101)
+  LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
+  LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+  LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}});
+  LT.addStandardOpcode(DW_LNS_copy, {});
+  LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+  LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
+
+  // Second sequence with addresses 0x1000(Ln200), 0x1004(Ln201)
+  LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
+  LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+  LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}});
+  LT.addStandardOpcode(DW_LNS_copy, {});
+  LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+  LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
+
+  // Generate the DWARF
+  generate();
+
+  // Parse the line table to get the sequence offset
+  auto ExpectedLineTable = Line.getOrParseLineTable(
+      LineData, /*Offset=*/0, *Context, nullptr, RecordRecoverable);
+  ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded());
+  const auto *Table = *ExpectedLineTable;
+
+  uint32_t NumCUs = Context->getNumCompileUnits();
+  ASSERT_EQ(NumCUs, 1u);
+  DWARFUnit *Unit = Context->getUnitAtIndex(0);
+  auto DwarfCUDie = Unit->getUnitDIE(false);
+
+  auto Sub1Die = DwarfCUDie.getFirstChild();
+  auto Sub2Die = Sub1Die.getSibling();
+  auto Sub3Die = Sub2Die.getSibling();
+
+  // Verify Sub1Die is the DIE generated from SD1
+  auto NameAttr1 = Sub1Die.find(DW_AT_name);
+  EXPECT_STREQ(*dwarf::toString(*NameAttr1), "sub1");
+
+  // Verify Sub2Die is the DIE generated from SD2
+  auto NameAttr2 = Sub2Die.find(DW_AT_name);
+  EXPECT_STREQ(*dwarf::toString(*NameAttr2), "sub2");
+
+  // Verify Sub2Die is the DIE generated from SD3
+  auto NameAttr3 = Sub3Die.find(DW_AT_name);
+  EXPECT_STREQ(*dwarf::toString(*NameAttr3), "sub3");
+
+  // Ensure there are two sequences
+  ASSERT_EQ(Table->Sequences.size(), 2u);
+
+  // Lookup addresses in the first sequence with the second sequence's filter
+  {
+    std::vector<uint32_t> Rows;
+    bool Found;
+
+    // Look up using Sub3Die, which has an invalid DW_AT_LLVM_stmt_sequence
+    Found = Table->lookupAddressRange(
+        {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+        &Sub3Die);
+    EXPECT_FALSE(Found);
+
+    // Look up using Sub1Die, which has a valid DW_AT_LLVM_stmt_sequence and
+    // should return row 0
+    Found = Table->lookupAddressRange(
+        {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+        &Sub1Die);
+    EXPECT_TRUE(Found);
+    ASSERT_EQ(Rows.size(), 1u);
+    EXPECT_EQ(Rows[0], 0);
+
+    // Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and
+    // should return row 3
+    Rows.clear();
+    Found = Table->lookupAddressRange(
+        {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+        &Sub2Die);
+    EXPECT_TRUE(Found);
+    ASSERT_EQ(Rows.size(), 1u);
+    EXPECT_EQ(Rows[0], 3);
+  }
+}
 } // end anonymous namespace

>From ad1d1d0a606f35466c8fdf74a00d7363d2a50442 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Tue, 21 Jan 2025 15:39:36 -0800
Subject: [PATCH 2/7] Address Comments #1

---
 .../llvm/DebugInfo/DWARF/DWARFDebugLine.h     | 16 +++++----------
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp   | 20 +++++++++++--------
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 732a4bfc28ab3a3..734a00801457bd6 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -210,7 +210,7 @@ class DWARFDebugLine {
     bool Empty;
 
     /// The offset into the line table where this sequence begins
-    uint64_t Offset = 0;
+    uint64_t StmtSeqOffset = UINT64_MAX;
 
     void reset();
 
@@ -227,6 +227,8 @@ class DWARFDebugLine {
       return SectionIndex == PC.SectionIndex &&
              (LowPC <= PC.Address && PC.Address < HighPC);
     }
+
+    void SetSequenceOffset(uint64_t Offset) { StmtSeqOffset = Offset; }
   };
 
   struct LineTable {
@@ -403,16 +405,8 @@ class DWARFDebugLine {
     ParsingState(struct LineTable *LT, uint64_t TableOffset,
                  function_ref<void(Error)> ErrorHandler);
 
-    void resetRowAndSequence();
-
-    /// Append the current Row to the LineTable's matrix of rows and update the
-    /// current Sequence information.
-    ///
-    /// \param LineTableOffset - the offset into the line table where the
-    /// current sequence of rows begins. This offset is stored in the Sequence
-    /// to allow filtering rows based on their originating sequence when a
-    /// DW_AT_LLVM_stmt_sequence attribute is present.
-    void appendRowToMatrix(uint64_t LineTableOffset);
+    void resetRowAndSequence(uint64_t Offset = UINT64_MAX);
+    void appendRowToMatrix();
 
     struct AddrOpIndexDelta {
       uint64_t AddrOffset;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 27b4c6a5489127f..dd91ef04eca83f9 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -531,6 +531,7 @@ void DWARFDebugLine::Sequence::reset() {
   FirstRowIndex = 0;
   LastRowIndex = 0;
   Empty = true;
+  StmtSeqOffset = UINT64_MAX;
 }
 
 DWARFDebugLine::LineTable::LineTable() { clear(); }
@@ -565,14 +566,16 @@ DWARFDebugLine::ParsingState::ParsingState(
   resetRowAndSequence();
 }
 
-void DWARFDebugLine::ParsingState::resetRowAndSequence() {
+void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
   Row.reset(LineTable->Prologue.DefaultIsStmt);
   Sequence.reset();
+  if (Offset != UINT64_MAX) {
+    Sequence.SetSequenceOffset(Offset);
+  }
 }
 
-void DWARFDebugLine::ParsingState::appendRowToMatrix(uint64_t LineTableOffset) {
+void DWARFDebugLine::ParsingState::appendRowToMatrix() {
   unsigned RowNumber = LineTable->Rows.size();
-  Sequence.Offset = LineTableOffset;
   if (Sequence.Empty) {
     // Record the beginning of instruction sequence.
     Sequence.Empty = false;
@@ -849,7 +852,9 @@ Error DWARFDebugLine::LineTable::parse(
     *OS << '\n';
     Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
   }
-  uint64_t LineTableSeqOffset = *OffsetPtr;
+  // *OffsetPtr points to the end of the prologue - i.e. the start of the first
+  // sequence. So initialize the first sequence offset accordingly.
+  State.Sequence.SetSequenceOffset(*OffsetPtr);
 
   bool TombstonedAddress = false;
   auto EmitRow = [&] {
@@ -860,7 +865,7 @@ Error DWARFDebugLine::LineTable::parse(
       }
       if (OS)
         State.Row.dump(*OS);
-      State.appendRowToMatrix(LineTableSeqOffset);
+      State.appendRowToMatrix();
     }
   };
   while (*OffsetPtr < EndOffset) {
@@ -915,10 +920,9 @@ Error DWARFDebugLine::LineTable::parse(
         // into this code path - if it were invalid, the default case would be
         // followed.
         EmitRow();
-        State.resetRowAndSequence();
         // Cursor now points to right after the end_sequence opcode - so points
         // to the start of the next sequence - if one exists.
-        LineTableSeqOffset = Cursor.tell();
+        State.resetRowAndSequence(Cursor.tell());
         break;
 
       case DW_LNE_set_address:
@@ -1419,7 +1423,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
 
     // Skip sequences that don't match our stmt_sequence offset if one was
     // provided
-    if (StmtSequenceOffset && CurSeq.Offset != *StmtSequenceOffset) {
+    if (StmtSequenceOffset && CurSeq.StmtSeqOffset != *StmtSequenceOffset) {
       ++SeqPos;
       continue;
     }

>From cc8df97c0619b1456a371e132642f1c08e7a7dba Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Tue, 21 Jan 2025 17:58:51 -0800
Subject: [PATCH 3/7] Fix failing test

---
 .../unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index 12ec63d10449f8c..002e105daeb9234 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -2059,25 +2059,25 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
   dwarfgen::CompileUnit &CU = Gen->addCompileUnit();
   dwarfgen::DIE CUDie = CU.getUnitDIE();
 
-  CUDie.addAttribute(DW_AT_name, DW_FORM_strp, "/tmp/main.c");
+  CUDie.addAttribute(DW_AT_name, DW_FORM_string, "/tmp/main.c");
   CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C);
 
   dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram);
-  SD1.addAttribute(DW_AT_name, DW_FORM_strp, "sub1");
+  SD1.addAttribute(DW_AT_name, DW_FORM_string, "sub1");
   SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
   SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
   // DW_AT_LLVM_stmt_sequence points to the first sequence
   SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e);
 
   dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram);
-  SD2.addAttribute(DW_AT_name, DW_FORM_strp, "sub2");
+  SD2.addAttribute(DW_AT_name, DW_FORM_string, "sub2");
   SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
   SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
   // DW_AT_LLVM_stmt_sequence points to the second sequence
   SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42);
 
   dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram);
-  SD3.addAttribute(DW_AT_name, DW_FORM_strp, "sub3");
+  SD3.addAttribute(DW_AT_name, DW_FORM_string, "sub3");
   SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
   SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
   // Invalid DW_AT_LLVM_stmt_sequence
@@ -2153,7 +2153,7 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
         &Sub1Die);
     EXPECT_TRUE(Found);
     ASSERT_EQ(Rows.size(), 1u);
-    EXPECT_EQ(Rows[0], 0);
+    EXPECT_EQ(Rows[0], 0U);
 
     // Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and
     // should return row 3
@@ -2163,7 +2163,7 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
         &Sub2Die);
     EXPECT_TRUE(Found);
     ASSERT_EQ(Rows.size(), 1u);
-    EXPECT_EQ(Rows[0], 3);
+    EXPECT_EQ(Rows[0], 3u);
   }
 }
 } // end anonymous namespace

>From 88a816f2971d4c90a00a8a06cdf7bc77d539b569 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Thu, 23 Jan 2025 15:19:01 -0800
Subject: [PATCH 4/7] Dont pass DIE, pass offset

---
 .../llvm/DebugInfo/DWARF/DWARFDebugLine.h     | 12 +++++------
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp   | 11 ++--------
 .../DebugInfo/DWARF/DWARFDebugLineTest.cpp    | 20 +++++++++++--------
 3 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 734a00801457bd6..5694cbdde6fdbe4 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -254,14 +254,14 @@ class DWARFDebugLine {
     /// \param Address - The starting address of the range.
     /// \param Size - The size of the address range.
     /// \param Result - The vector to fill with row indices.
-    /// \param Die - if provided, the function will check for a
-    /// DW_AT_LLVM_stmt_sequence attribute. If present, only rows from the
-    /// sequence starting at the matching offset will be added to the result.
+    /// \param StmtSequenceOffset - if provided, only rows from the sequence
+    /// starting at the matching offset will be added to the result.
     ///
     /// Returns true if any rows were found.
-    bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
-                            std::vector<uint32_t> &Result,
-                            const DWARFDie *Die = nullptr) const;
+    bool lookupAddressRange(
+        object::SectionedAddress Address, uint64_t Size,
+        std::vector<uint32_t> &Result,
+        std::optional<uint64_t> StmtSequenceOffset = std::nullopt) const;
 
     bool hasFileAtIndex(uint64_t FileIndex) const {
       return Prologue.hasFileAtIndex(FileIndex);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index dd91ef04eca83f9..eb5181276a37a44 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -1374,15 +1374,8 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,
 
 bool DWARFDebugLine::LineTable::lookupAddressRange(
     object::SectionedAddress Address, uint64_t Size,
-    std::vector<uint32_t> &Result, const DWARFDie *Die) const {
-
-  // If DIE is provided, check for DW_AT_LLVM_stmt_sequence
-  std::optional<uint64_t> StmtSequenceOffset;
-  if (Die) {
-    if (auto StmtSeqAttr = Die->find(dwarf::DW_AT_LLVM_stmt_sequence)) {
-      StmtSequenceOffset = toSectionOffset(StmtSeqAttr);
-    }
-  }
+    std::vector<uint32_t> &Result,
+    std::optional<uint64_t> StmtSequenceOffset) const {
 
   // Search for relocatable addresses
   if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index 002e105daeb9234..4c0116ad32b1fd3 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -2140,27 +2140,31 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
     std::vector<uint32_t> Rows;
     bool Found;
 
-    // Look up using Sub3Die, which has an invalid DW_AT_LLVM_stmt_sequence
+    // Look up using Sub3Die's invalid stmt_sequence offset
+    auto StmtSeqAttr3 = Sub3Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
+    ASSERT_TRUE(StmtSeqAttr3);
     Found = Table->lookupAddressRange(
         {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
-        &Sub3Die);
+        toSectionOffset(StmtSeqAttr3));
     EXPECT_FALSE(Found);
 
-    // Look up using Sub1Die, which has a valid DW_AT_LLVM_stmt_sequence and
-    // should return row 0
+    // Look up using Sub1Die's valid stmt_sequence offset
+    auto StmtSeqAttr1 = Sub1Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
+    ASSERT_TRUE(StmtSeqAttr1);
     Found = Table->lookupAddressRange(
         {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
-        &Sub1Die);
+        toSectionOffset(StmtSeqAttr1));
     EXPECT_TRUE(Found);
     ASSERT_EQ(Rows.size(), 1u);
     EXPECT_EQ(Rows[0], 0U);
 
-    // Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and
-    // should return row 3
+    // Look up using Sub2Die's valid stmt_sequence offset
     Rows.clear();
+    auto StmtSeqAttr2 = Sub2Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
+    ASSERT_TRUE(StmtSeqAttr2);
     Found = Table->lookupAddressRange(
         {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
-        &Sub2Die);
+        toSectionOffset(StmtSeqAttr2));
     EXPECT_TRUE(Found);
     ASSERT_EQ(Rows.size(), 1u);
     EXPECT_EQ(Rows[0], 3u);

>From 7aa8caeec4133979313ddbc34c4f1a87aa0df81d Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Wed, 29 Jan 2025 05:28:23 -0800
Subject: [PATCH 5/7] Address feedback nr.2

---
 llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h |  2 +-
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp        | 10 +++-------
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 5694cbdde6fdbe4..b1bfc139119afe5 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -405,7 +405,7 @@ class DWARFDebugLine {
     ParsingState(struct LineTable *LT, uint64_t TableOffset,
                  function_ref<void(Error)> ErrorHandler);
 
-    void resetRowAndSequence(uint64_t Offset = UINT64_MAX);
+    void resetRowAndSequence(uint64_t Offset);
     void appendRowToMatrix();
 
     struct AddrOpIndexDelta {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index eb5181276a37a44..ebc1e28fa0294b2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -562,16 +562,12 @@ void DWARFDebugLine::LineTable::clear() {
 DWARFDebugLine::ParsingState::ParsingState(
     struct LineTable *LT, uint64_t TableOffset,
     function_ref<void(Error)> ErrorHandler)
-    : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {
-  resetRowAndSequence();
-}
+    : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {}
 
 void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
   Row.reset(LineTable->Prologue.DefaultIsStmt);
   Sequence.reset();
-  if (Offset != UINT64_MAX) {
-    Sequence.SetSequenceOffset(Offset);
-  }
+  Sequence.SetSequenceOffset(Offset);
 }
 
 void DWARFDebugLine::ParsingState::appendRowToMatrix() {
@@ -854,7 +850,7 @@ Error DWARFDebugLine::LineTable::parse(
   }
   // *OffsetPtr points to the end of the prologue - i.e. the start of the first
   // sequence. So initialize the first sequence offset accordingly.
-  State.Sequence.SetSequenceOffset(*OffsetPtr);
+  State.resetRowAndSequence(*OffsetPtr);
 
   bool TombstonedAddress = false;
   auto EmitRow = [&] {

>From 16e8e204ac986a45f7778e624e05e19e44224098 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Tue, 4 Feb 2025 09:07:55 -0800
Subject: [PATCH 6/7] Address Feedback Nr.3

---
 .../llvm/DebugInfo/DWARF/DWARFDebugLine.h     |   2 -
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp   |  99 +++++++----
 .../DebugInfo/DWARF/DWARFDebugLineTest.cpp    | 160 ++++++++----------
 3 files changed, 136 insertions(+), 125 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index b1bfc139119afe5..61f17a27b3d286c 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -227,8 +227,6 @@ class DWARFDebugLine {
       return SectionIndex == PC.SectionIndex &&
              (LowPC <= PC.Address && PC.Address < HighPC);
     }
-
-    void SetSequenceOffset(uint64_t Offset) { StmtSeqOffset = Offset; }
   };
 
   struct LineTable {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index ebc1e28fa0294b2..606635da2fec6dd 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -567,7 +567,7 @@ DWARFDebugLine::ParsingState::ParsingState(
 void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
   Row.reset(LineTable->Prologue.DefaultIsStmt);
   Sequence.reset();
-  Sequence.SetSequenceOffset(Offset);
+  Sequence.StmtSeqOffset = Offset;
 }
 
 void DWARFDebugLine::ParsingState::appendRowToMatrix() {
@@ -1391,51 +1391,86 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
     std::optional<uint64_t> StmtSequenceOffset) const {
   if (Sequences.empty())
     return false;
-  uint64_t EndAddr = Address.Address + Size;
-  // First, find an instruction sequence containing the given address.
-  DWARFDebugLine::Sequence Sequence;
-  Sequence.SectionIndex = Address.SectionIndex;
-  Sequence.HighPC = Address.Address;
-  SequenceIter LastSeq = Sequences.end();
-  SequenceIter SeqPos = llvm::upper_bound(
-      Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC);
-  if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
-    return false;
 
-  SequenceIter StartPos = SeqPos;
+  const uint64_t EndAddr = Address.Address + Size;
 
-  // Add the rows from the first sequence to the vector, starting with the
-  // index we just calculated
-
-  while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
-    const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
-
-    // Skip sequences that don't match our stmt_sequence offset if one was
-    // provided
-    if (StmtSequenceOffset && CurSeq.StmtSeqOffset != *StmtSequenceOffset) {
-      ++SeqPos;
-      continue;
-    }
+  // Helper: Given a sequence and a starting address, find the subset
+  // of rows within [Address, EndAddr) and append them to `Result`.
+  auto addRowsFromSequence = [&](const Sequence &Seq,
+                                 object::SectionedAddress StartAddr,
+                                 uint64_t EndAddress, bool IsFirstSequence) {
+    // If this sequence does not intersect our [StartAddr, EndAddress) range,
+    // do nothing.
+    if (Seq.HighPC <= StartAddr.Address || Seq.LowPC >= EndAddress)
+      return;
 
-    // For the first sequence, we need to find which row in the sequence is the
-    // first in our range.
-    uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
-    if (SeqPos == StartPos)
-      FirstRowIndex = findRowInSeq(CurSeq, Address);
+    // For the "first" sequence in the search, we must figure out which row
+    // actually starts within our address range.
+    uint32_t FirstRowIndex = Seq.FirstRowIndex;
+    if (IsFirstSequence)
+      FirstRowIndex = findRowInSeq(Seq, StartAddr);
 
-    // Figure out the last row in the range.
+    // Similarly, compute the last row that is within [StartAddr, EndAddress).
     uint32_t LastRowIndex =
-        findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex});
+        findRowInSeq(Seq, {EndAddress - 1, StartAddr.SectionIndex});
     if (LastRowIndex == UnknownRowIndex)
-      LastRowIndex = CurSeq.LastRowIndex - 1;
+      LastRowIndex = Seq.LastRowIndex - 1;
 
     assert(FirstRowIndex != UnknownRowIndex);
     assert(LastRowIndex != UnknownRowIndex);
 
+    // Append all row indices in [FirstRowIndex, LastRowIndex].
     for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) {
       Result.push_back(I);
     }
+  };
+
+  // If a stmt_sequence_offset was provided, do a binary search to find the
+  // single sequence that matches that offset. Then add only its relevant rows.
+  if (StmtSequenceOffset) {
+    // Binary-search the Sequences by their StmtSeqOffset:
+    auto It = llvm::lower_bound(Sequences, *StmtSequenceOffset,
+                                [](const Sequence &Seq, uint64_t OffsetVal) {
+                                  return Seq.StmtSeqOffset < OffsetVal;
+                                });
+
+    // If not found or mismatched, there’s no match to return.
+    if (It == Sequences.end() || It->StmtSeqOffset != *StmtSequenceOffset)
+      return false;
+
+    // Now add rows from the discovered sequence if it intersects [Address,
+    // EndAddr).
+    addRowsFromSequence(*It, Address, EndAddr, /*IsFirstSequence=*/true);
+    return !Result.empty();
+  }
 
+  // Otherwise, fall back to logic of walking sequences by Address.
+  // We first find a sequence containing `Address` (via upper_bound on HighPC),
+  // then proceed forward through overlapping sequences in ascending order.
+
+  // Construct a dummy Sequence to find where `Address` fits by HighPC.
+  DWARFDebugLine::Sequence SearchSeq;
+  SearchSeq.SectionIndex = Address.SectionIndex;
+  SearchSeq.HighPC = Address.Address;
+
+  auto LastSeq = Sequences.end();
+  auto SeqPos = llvm::upper_bound(Sequences, SearchSeq,
+                                  DWARFDebugLine::Sequence::orderByHighPC);
+  if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
+    return false;
+
+  // This marks the first sequence we found that might contain Address.
+  const auto StartPos = SeqPos;
+
+  // Walk forward until sequences no longer intersect [Address, EndAddr).
+  while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
+    // Add rows only if the sequence is in the same section:
+    if (SeqPos->SectionIndex == Address.SectionIndex) {
+      // For the very first sequence, we need the row that lines up with
+      // `Address`.
+      bool IsFirst = (SeqPos == StartPos);
+      addRowsFromSequence(*SeqPos, Address, EndAddr, IsFirst);
+    }
     ++SeqPos;
   }
 
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index 4c0116ad32b1fd3..2fe52600df923bf 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -2036,138 +2036,116 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) {
 }
 
 /// Test that lookupAddressRange correctly filters rows based on
-/// DW_AT_LLVM_stmt_sequence.
+/// a statement-sequence offset (simulating DW_AT_LLVM_stmt_sequence).
 ///
 /// This test verifies that:
-/// 1. When a DIE has a DW_AT_LLVM_stmt_sequence attribute, lookupAddressRange
-/// only returns rows from the sequence starting at the specified offset
-/// 2. When a DIE has an invalid DW_AT_LLVM_stmt_sequence offset, no rows are
-/// returned
-/// 3. When no DW_AT_LLVM_stmt_sequence is present, all matching rows are
-/// returned
+/// 1. When a statement-sequence offset is provided, lookupAddressRange
+///    only returns rows from the sequence starting at that offset.
+/// 2. When an invalid statement-sequence offset is provided, no rows
+///    are returned.
+/// 3. When no statement-sequence offset is provided, all matching rows
+///    in the table are returned.
 ///
-/// The test creates a line table with two sequences at the same address range
-/// but different line numbers. It then creates three subprogram DIEs:
-/// - One with DW_AT_LLVM_stmt_sequence pointing to the first sequence
-/// - One with DW_AT_LLVM_stmt_sequence pointing to the second sequence
-/// - One with an invalid DW_AT_LLVM_stmt_sequence offset
+/// We build a line table with two sequences at the same address range
+/// but different line numbers. Then we try lookups with various statement-
+/// sequence offsets to check the filtering logic.
 TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
   if (!setupGenerator())
     GTEST_SKIP();
 
-  // Create new DWARF with the subprogram DIE
-  dwarfgen::CompileUnit &CU = Gen->addCompileUnit();
-  dwarfgen::DIE CUDie = CU.getUnitDIE();
-
-  CUDie.addAttribute(DW_AT_name, DW_FORM_string, "/tmp/main.c");
-  CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C);
-
-  dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram);
-  SD1.addAttribute(DW_AT_name, DW_FORM_string, "sub1");
-  SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
-  SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
-  // DW_AT_LLVM_stmt_sequence points to the first sequence
-  SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e);
-
-  dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram);
-  SD2.addAttribute(DW_AT_name, DW_FORM_string, "sub2");
-  SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
-  SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
-  // DW_AT_LLVM_stmt_sequence points to the second sequence
-  SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42);
-
-  dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram);
-  SD3.addAttribute(DW_AT_name, DW_FORM_string, "sub3");
-  SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
-  SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
-  // Invalid DW_AT_LLVM_stmt_sequence
-  SD3.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x66);
-
-  // Create a line table with multiple sequences
+  // Create a line table that has two sequences covering [0x1000, 0x1004).
+  // Each sequence has two rows: addresses at 0x1000 and 0x1004, but
+  // they differ by line numbers (100 vs. 200, etc.).
+  //
+  // We'll pretend the first sequence starts at offset 0x2e in the line table,
+  // the second at 0x42, and we'll also test an invalid offset 0x66.
+
   LineTable &LT = Gen->addLineTable();
 
-  // First sequence with addresses 0x1000(Ln100), 0x1004(Ln101)
+  // First sequence at offset 0x2e: addresses 0x1000(Ln=100), 0x1004(Ln=101)
   LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
   LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+  // Advance the line register by 99 (so line=100) and copy.
   LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}});
   LT.addStandardOpcode(DW_LNS_copy, {});
-  LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+  // 0x4b is a special opcode: address += 4, line += 1 (so line=101).
+  LT.addByte(0x4b);
+  // End this sequence.
   LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
 
-  // Second sequence with addresses 0x1000(Ln200), 0x1004(Ln201)
+  // Second sequence at offset 0x42: addresses 0x1000(Ln=200), 0x1004(Ln=201)
   LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
   LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
   LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}});
   LT.addStandardOpcode(DW_LNS_copy, {});
-  LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+  // 0x4b again: address += 4, line += 1 (so line=201).
+  LT.addByte(0x4b);
+  // End this second sequence.
   LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
 
-  // Generate the DWARF
+  // Generate the DWARF data.
   generate();
 
-  // Parse the line table to get the sequence offset
-  auto ExpectedLineTable = Line.getOrParseLineTable(
-      LineData, /*Offset=*/0, *Context, nullptr, RecordRecoverable);
+  // Parse the line table.
+  auto ExpectedLineTable =
+      Line.getOrParseLineTable(LineData, /*Offset=*/0, *Context,
+                               /*DwarfUnit=*/nullptr, RecordRecoverable);
   ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded());
   const auto *Table = *ExpectedLineTable;
 
-  uint32_t NumCUs = Context->getNumCompileUnits();
-  ASSERT_EQ(NumCUs, 1u);
-  DWARFUnit *Unit = Context->getUnitAtIndex(0);
-  auto DwarfCUDie = Unit->getUnitDIE(false);
-
-  auto Sub1Die = DwarfCUDie.getFirstChild();
-  auto Sub2Die = Sub1Die.getSibling();
-  auto Sub3Die = Sub2Die.getSibling();
-
-  // Verify Sub1Die is the DIE generated from SD1
-  auto NameAttr1 = Sub1Die.find(DW_AT_name);
-  EXPECT_STREQ(*dwarf::toString(*NameAttr1), "sub1");
-
-  // Verify Sub2Die is the DIE generated from SD2
-  auto NameAttr2 = Sub2Die.find(DW_AT_name);
-  EXPECT_STREQ(*dwarf::toString(*NameAttr2), "sub2");
-
-  // Verify Sub2Die is the DIE generated from SD3
-  auto NameAttr3 = Sub3Die.find(DW_AT_name);
-  EXPECT_STREQ(*dwarf::toString(*NameAttr3), "sub3");
-
-  // Ensure there are two sequences
+  // The table should have two sequences, each starting at our chosen offsets.
   ASSERT_EQ(Table->Sequences.size(), 2u);
 
-  // Lookup addresses in the first sequence with the second sequence's filter
+  // 1) Try looking up with an invalid offset (simulating an invalid
+  //    DW_AT_LLVM_stmt_sequence). We expect no rows.
   {
     std::vector<uint32_t> Rows;
-    bool Found;
-
-    // Look up using Sub3Die's invalid stmt_sequence offset
-    auto StmtSeqAttr3 = Sub3Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
-    ASSERT_TRUE(StmtSeqAttr3);
-    Found = Table->lookupAddressRange(
+    bool Found = Table->lookupAddressRange(
         {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
-        toSectionOffset(StmtSeqAttr3));
+        /*StmtSequenceOffset=*/0x66); // invalid offset
     EXPECT_FALSE(Found);
+    EXPECT_TRUE(Rows.empty());
+  }
 
-    // Look up using Sub1Die's valid stmt_sequence offset
-    auto StmtSeqAttr1 = Sub1Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
-    ASSERT_TRUE(StmtSeqAttr1);
-    Found = Table->lookupAddressRange(
+  // 2) Look up using the offset 0x2e (our first sequence). We expect
+  //    to get the rows from that sequence only (which for 0x1000 is row #0).
+  {
+    std::vector<uint32_t> Rows;
+    bool Found = Table->lookupAddressRange(
         {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
-        toSectionOffset(StmtSeqAttr1));
+        /*StmtSequenceOffset=*/0x2e);
     EXPECT_TRUE(Found);
     ASSERT_EQ(Rows.size(), 1u);
-    EXPECT_EQ(Rows[0], 0U);
+    // The first sequence's first row is index 0.
+    EXPECT_EQ(Rows[0], 0u);
+  }
 
-    // Look up using Sub2Die's valid stmt_sequence offset
-    Rows.clear();
-    auto StmtSeqAttr2 = Sub2Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
-    ASSERT_TRUE(StmtSeqAttr2);
-    Found = Table->lookupAddressRange(
+  // 3) Look up using the offset 0x42 (second sequence). For address 0x1000
+  //    in that second sequence, we should see row #2.
+  {
+    std::vector<uint32_t> Rows;
+    bool Found = Table->lookupAddressRange(
         {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
-        toSectionOffset(StmtSeqAttr2));
+        /*StmtSequenceOffset=*/0x42);
     EXPECT_TRUE(Found);
     ASSERT_EQ(Rows.size(), 1u);
+    // The second sequence's first row is index 2 in the table.
     EXPECT_EQ(Rows[0], 3u);
   }
+
+  // 4) Look up with no statement-sequence offset specified.
+  //    We should get rows from both sequences for address 0x1000.
+  {
+    std::vector<uint32_t> Rows;
+    bool Found = Table->lookupAddressRange(
+        {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+        std::nullopt /* no filter */);
+    EXPECT_TRUE(Found);
+    // The first sequence's row is #0, second's row is #2, so both should
+    // appear.
+    ASSERT_EQ(Rows.size(), 2u);
+    EXPECT_EQ(Rows[0], 0u);
+    EXPECT_EQ(Rows[1], 3u);
+  }
 }
 } // end anonymous namespace

>From 569bbe4b08b4a673fca6b26b1eb6a624f22bf080 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Wed, 5 Feb 2025 16:03:17 -0800
Subject: [PATCH 7/7] Address Feedback Nr.4

---
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 111 ++++++++------------
 1 file changed, 44 insertions(+), 67 deletions(-)

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 606635da2fec6dd..267958a8602ff04 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -1391,90 +1391,67 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
     std::optional<uint64_t> StmtSequenceOffset) const {
   if (Sequences.empty())
     return false;
+  uint64_t EndAddr = Address.Address + Size;
+  // First, find an instruction sequence containing the given address.
+  DWARFDebugLine::Sequence Sequence;
+  Sequence.SectionIndex = Address.SectionIndex;
+  Sequence.HighPC = Address.Address;
+  SequenceIter LastSeq = Sequences.end();
+  SequenceIter SeqPos;
 
-  const uint64_t EndAddr = Address.Address + Size;
+  if (StmtSequenceOffset) {
+    // If we have a statement sequence offset, find the specific sequence
+    // Binary search for sequence with matching StmtSeqOffset
+    Sequence.StmtSeqOffset = *StmtSequenceOffset;
+    SeqPos = std::lower_bound(Sequences.begin(), LastSeq, Sequence,
+                              [](const DWARFDebugLine::Sequence &LHS,
+                                 const DWARFDebugLine::Sequence &RHS) {
+                                return LHS.StmtSeqOffset < RHS.StmtSeqOffset;
+                              });
+
+    // If sequence not found or doesn't contain the address, return false
+    if (SeqPos == LastSeq || SeqPos->StmtSeqOffset != *StmtSequenceOffset ||
+        !SeqPos->containsPC(Address))
+      return false;
 
-  // Helper: Given a sequence and a starting address, find the subset
-  // of rows within [Address, EndAddr) and append them to `Result`.
-  auto addRowsFromSequence = [&](const Sequence &Seq,
-                                 object::SectionedAddress StartAddr,
-                                 uint64_t EndAddress, bool IsFirstSequence) {
-    // If this sequence does not intersect our [StartAddr, EndAddress) range,
-    // do nothing.
-    if (Seq.HighPC <= StartAddr.Address || Seq.LowPC >= EndAddress)
-      return;
+    // Set LastSeq to just past this sequence since we only want this one
+    LastSeq = std::next(SeqPos);
+  } else {
+    // No specific sequence requested, find first sequence containing address
+    SeqPos = std::upper_bound(Sequences.begin(), LastSeq, Sequence,
+                              DWARFDebugLine::Sequence::orderByHighPC);
+    if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
+      return false;
+  }
 
-    // For the "first" sequence in the search, we must figure out which row
-    // actually starts within our address range.
-    uint32_t FirstRowIndex = Seq.FirstRowIndex;
-    if (IsFirstSequence)
-      FirstRowIndex = findRowInSeq(Seq, StartAddr);
+  SequenceIter StartPos = SeqPos;
 
-    // Similarly, compute the last row that is within [StartAddr, EndAddress).
+  // Process sequences that overlap with the desired range
+  while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
+    const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
+    // For the first sequence, we need to find which row in the sequence is the
+    // first in our range.
+    uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
+    if (SeqPos == StartPos)
+      FirstRowIndex = findRowInSeq(CurSeq, Address);
+
+    // Figure out the last row in the range.
     uint32_t LastRowIndex =
-        findRowInSeq(Seq, {EndAddress - 1, StartAddr.SectionIndex});
+        findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex});
     if (LastRowIndex == UnknownRowIndex)
-      LastRowIndex = Seq.LastRowIndex - 1;
+      LastRowIndex = CurSeq.LastRowIndex - 1;
 
     assert(FirstRowIndex != UnknownRowIndex);
     assert(LastRowIndex != UnknownRowIndex);
 
-    // Append all row indices in [FirstRowIndex, LastRowIndex].
     for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) {
       Result.push_back(I);
     }
-  };
 
-  // If a stmt_sequence_offset was provided, do a binary search to find the
-  // single sequence that matches that offset. Then add only its relevant rows.
-  if (StmtSequenceOffset) {
-    // Binary-search the Sequences by their StmtSeqOffset:
-    auto It = llvm::lower_bound(Sequences, *StmtSequenceOffset,
-                                [](const Sequence &Seq, uint64_t OffsetVal) {
-                                  return Seq.StmtSeqOffset < OffsetVal;
-                                });
-
-    // If not found or mismatched, there’s no match to return.
-    if (It == Sequences.end() || It->StmtSeqOffset != *StmtSequenceOffset)
-      return false;
-
-    // Now add rows from the discovered sequence if it intersects [Address,
-    // EndAddr).
-    addRowsFromSequence(*It, Address, EndAddr, /*IsFirstSequence=*/true);
-    return !Result.empty();
-  }
-
-  // Otherwise, fall back to logic of walking sequences by Address.
-  // We first find a sequence containing `Address` (via upper_bound on HighPC),
-  // then proceed forward through overlapping sequences in ascending order.
-
-  // Construct a dummy Sequence to find where `Address` fits by HighPC.
-  DWARFDebugLine::Sequence SearchSeq;
-  SearchSeq.SectionIndex = Address.SectionIndex;
-  SearchSeq.HighPC = Address.Address;
-
-  auto LastSeq = Sequences.end();
-  auto SeqPos = llvm::upper_bound(Sequences, SearchSeq,
-                                  DWARFDebugLine::Sequence::orderByHighPC);
-  if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
-    return false;
-
-  // This marks the first sequence we found that might contain Address.
-  const auto StartPos = SeqPos;
-
-  // Walk forward until sequences no longer intersect [Address, EndAddr).
-  while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
-    // Add rows only if the sequence is in the same section:
-    if (SeqPos->SectionIndex == Address.SectionIndex) {
-      // For the very first sequence, we need the row that lines up with
-      // `Address`.
-      bool IsFirst = (SeqPos == StartPos);
-      addRowsFromSequence(*SeqPos, Address, EndAddr, IsFirst);
-    }
     ++SeqPos;
   }
 
-  return !Result.empty();
+  return true;
 }
 
 std::optional<StringRef>



More information about the llvm-commits mailing list