[llvm] [DebugInfo][DWARF] Utilize DW_AT_LLVM_stmt_sequence attr in line table lookups (PR #123391)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 17:28:46 PST 2025
https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/123391
>From 658a02c24f452dd70df35e44444c90a696cc3f65 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Fri, 17 Jan 2025 11:33:20 -0800
Subject: [PATCH 1/8] [DebugInfo] Add option to use DW_AT_LLVM_stmt_sequence in
line table lookups
---
.../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 43 +++++-
llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 37 ++++-
.../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 133 +++++++++++++++++-
3 files changed, 201 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index ff7bf87d8e6b5a8..732a4bfc28ab3a3 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -209,6 +209,9 @@ class DWARFDebugLine {
unsigned LastRowIndex;
bool Empty;
+ /// The offset into the line table where this sequence begins
+ uint64_t Offset = 0;
+
void reset();
static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
@@ -243,8 +246,20 @@ class DWARFDebugLine {
uint32_t lookupAddress(object::SectionedAddress Address,
bool *IsApproximateLine = nullptr) const;
+ /// Fills the Result argument with the indices of the rows that correspond
+ /// to the address range specified by \p Address and \p Size.
+ ///
+ /// \param Address - The starting address of the range.
+ /// \param Size - The size of the address range.
+ /// \param Result - The vector to fill with row indices.
+ /// \param Die - if provided, the function will check for a
+ /// DW_AT_LLVM_stmt_sequence attribute. If present, only rows from the
+ /// sequence starting at the matching offset will be added to the result.
+ ///
+ /// Returns true if any rows were found.
bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result) const;
+ std::vector<uint32_t> &Result,
+ const DWARFDie *Die = nullptr) const;
bool hasFileAtIndex(uint64_t FileIndex) const {
return Prologue.hasFileAtIndex(FileIndex);
@@ -305,8 +320,20 @@ class DWARFDebugLine {
uint32_t lookupAddressImpl(object::SectionedAddress Address,
bool *IsApproximateLine = nullptr) const;
- bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result) const;
+ /// Fills the Result argument with the indices of the rows that correspond
+ /// to the address range specified by \p Address and \p Size.
+ ///
+ /// \param Address - The starting address of the range.
+ /// \param Size - The size of the address range.
+ /// \param Result - The vector to fill with row indices.
+ /// \param StmtSequenceOffset - if provided, only rows from the sequence
+ /// starting at the matching offset will be added to the result.
+ ///
+ /// Returns true if any rows were found.
+ bool
+ lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
+ std::vector<uint32_t> &Result,
+ std::optional<uint64_t> StmtSequenceOffset) const;
};
const LineTable *getLineTable(uint64_t Offset) const;
@@ -377,7 +404,15 @@ class DWARFDebugLine {
function_ref<void(Error)> ErrorHandler);
void resetRowAndSequence();
- void appendRowToMatrix();
+
+ /// Append the current Row to the LineTable's matrix of rows and update the
+ /// current Sequence information.
+ ///
+ /// \param LineTableOffset - the offset into the line table where the
+ /// current sequence of rows begins. This offset is stored in the Sequence
+ /// to allow filtering rows based on their originating sequence when a
+ /// DW_AT_LLVM_stmt_sequence attribute is present.
+ void appendRowToMatrix(uint64_t LineTableOffset);
struct AddrOpIndexDelta {
uint64_t AddrOffset;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 939a5163d55abc8..27b4c6a5489127f 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -570,8 +570,9 @@ void DWARFDebugLine::ParsingState::resetRowAndSequence() {
Sequence.reset();
}
-void DWARFDebugLine::ParsingState::appendRowToMatrix() {
+void DWARFDebugLine::ParsingState::appendRowToMatrix(uint64_t LineTableOffset) {
unsigned RowNumber = LineTable->Rows.size();
+ Sequence.Offset = LineTableOffset;
if (Sequence.Empty) {
// Record the beginning of instruction sequence.
Sequence.Empty = false;
@@ -848,6 +849,8 @@ Error DWARFDebugLine::LineTable::parse(
*OS << '\n';
Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
}
+ uint64_t LineTableSeqOffset = *OffsetPtr;
+
bool TombstonedAddress = false;
auto EmitRow = [&] {
if (!TombstonedAddress) {
@@ -857,7 +860,7 @@ Error DWARFDebugLine::LineTable::parse(
}
if (OS)
State.Row.dump(*OS);
- State.appendRowToMatrix();
+ State.appendRowToMatrix(LineTableSeqOffset);
}
};
while (*OffsetPtr < EndOffset) {
@@ -913,6 +916,9 @@ Error DWARFDebugLine::LineTable::parse(
// followed.
EmitRow();
State.resetRowAndSequence();
+ // Cursor now points to right after the end_sequence opcode - so points
+ // to the start of the next sequence - if one exists.
+ LineTableSeqOffset = Cursor.tell();
break;
case DW_LNE_set_address:
@@ -1364,10 +1370,18 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,
bool DWARFDebugLine::LineTable::lookupAddressRange(
object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result) const {
+ std::vector<uint32_t> &Result, const DWARFDie *Die) const {
+
+ // If DIE is provided, check for DW_AT_LLVM_stmt_sequence
+ std::optional<uint64_t> StmtSequenceOffset;
+ if (Die) {
+ if (auto StmtSeqAttr = Die->find(dwarf::DW_AT_LLVM_stmt_sequence)) {
+ StmtSequenceOffset = toSectionOffset(StmtSeqAttr);
+ }
+ }
// Search for relocatable addresses
- if (lookupAddressRangeImpl(Address, Size, Result))
+ if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
return true;
if (Address.SectionIndex == object::SectionedAddress::UndefSection)
@@ -1375,12 +1389,13 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
// Search for absolute addresses
Address.SectionIndex = object::SectionedAddress::UndefSection;
- return lookupAddressRangeImpl(Address, Size, Result);
+ return lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset);
}
bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result) const {
+ std::vector<uint32_t> &Result,
+ std::optional<uint64_t> StmtSequenceOffset) const {
if (Sequences.empty())
return false;
uint64_t EndAddr = Address.Address + Size;
@@ -1401,6 +1416,14 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
+
+ // Skip sequences that don't match our stmt_sequence offset if one was
+ // provided
+ if (StmtSequenceOffset && CurSeq.Offset != *StmtSequenceOffset) {
+ ++SeqPos;
+ continue;
+ }
+
// For the first sequence, we need to find which row in the sequence is the
// first in our range.
uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
@@ -1423,7 +1446,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
++SeqPos;
}
- return true;
+ return !Result.empty();
}
std::optional<StringRef>
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index e549128031744e6..12ec63d10449f8c 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -6,10 +6,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "DwarfGenerator.h"
#include "DwarfUtils.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
@@ -2035,4 +2035,135 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) {
EXPECT_THAT(Result.c_str(), MatchesRegex("a dir.b dir.b file"));
}
+/// Test that lookupAddressRange correctly filters rows based on
+/// DW_AT_LLVM_stmt_sequence.
+///
+/// This test verifies that:
+/// 1. When a DIE has a DW_AT_LLVM_stmt_sequence attribute, lookupAddressRange
+/// only returns rows from the sequence starting at the specified offset
+/// 2. When a DIE has an invalid DW_AT_LLVM_stmt_sequence offset, no rows are
+/// returned
+/// 3. When no DW_AT_LLVM_stmt_sequence is present, all matching rows are
+/// returned
+///
+/// The test creates a line table with two sequences at the same address range
+/// but different line numbers. It then creates three subprogram DIEs:
+/// - One with DW_AT_LLVM_stmt_sequence pointing to the first sequence
+/// - One with DW_AT_LLVM_stmt_sequence pointing to the second sequence
+/// - One with an invalid DW_AT_LLVM_stmt_sequence offset
+TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
+ if (!setupGenerator())
+ GTEST_SKIP();
+
+ // Create new DWARF with the subprogram DIE
+ dwarfgen::CompileUnit &CU = Gen->addCompileUnit();
+ dwarfgen::DIE CUDie = CU.getUnitDIE();
+
+ CUDie.addAttribute(DW_AT_name, DW_FORM_strp, "/tmp/main.c");
+ CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C);
+
+ dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram);
+ SD1.addAttribute(DW_AT_name, DW_FORM_strp, "sub1");
+ SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
+ SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
+ // DW_AT_LLVM_stmt_sequence points to the first sequence
+ SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e);
+
+ dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram);
+ SD2.addAttribute(DW_AT_name, DW_FORM_strp, "sub2");
+ SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
+ SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
+ // DW_AT_LLVM_stmt_sequence points to the second sequence
+ SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42);
+
+ dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram);
+ SD3.addAttribute(DW_AT_name, DW_FORM_strp, "sub3");
+ SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
+ SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
+ // Invalid DW_AT_LLVM_stmt_sequence
+ SD3.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x66);
+
+ // Create a line table with multiple sequences
+ LineTable < = Gen->addLineTable();
+
+ // First sequence with addresses 0x1000(Ln100), 0x1004(Ln101)
+ LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
+ LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+ LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}});
+ LT.addStandardOpcode(DW_LNS_copy, {});
+ LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+ LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
+
+ // Second sequence with addresses 0x1000(Ln200), 0x1004(Ln201)
+ LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
+ LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+ LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}});
+ LT.addStandardOpcode(DW_LNS_copy, {});
+ LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+ LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
+
+ // Generate the DWARF
+ generate();
+
+ // Parse the line table to get the sequence offset
+ auto ExpectedLineTable = Line.getOrParseLineTable(
+ LineData, /*Offset=*/0, *Context, nullptr, RecordRecoverable);
+ ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded());
+ const auto *Table = *ExpectedLineTable;
+
+ uint32_t NumCUs = Context->getNumCompileUnits();
+ ASSERT_EQ(NumCUs, 1u);
+ DWARFUnit *Unit = Context->getUnitAtIndex(0);
+ auto DwarfCUDie = Unit->getUnitDIE(false);
+
+ auto Sub1Die = DwarfCUDie.getFirstChild();
+ auto Sub2Die = Sub1Die.getSibling();
+ auto Sub3Die = Sub2Die.getSibling();
+
+ // Verify Sub1Die is the DIE generated from SD1
+ auto NameAttr1 = Sub1Die.find(DW_AT_name);
+ EXPECT_STREQ(*dwarf::toString(*NameAttr1), "sub1");
+
+ // Verify Sub2Die is the DIE generated from SD2
+ auto NameAttr2 = Sub2Die.find(DW_AT_name);
+ EXPECT_STREQ(*dwarf::toString(*NameAttr2), "sub2");
+
+ // Verify Sub2Die is the DIE generated from SD3
+ auto NameAttr3 = Sub3Die.find(DW_AT_name);
+ EXPECT_STREQ(*dwarf::toString(*NameAttr3), "sub3");
+
+ // Ensure there are two sequences
+ ASSERT_EQ(Table->Sequences.size(), 2u);
+
+ // Lookup addresses in the first sequence with the second sequence's filter
+ {
+ std::vector<uint32_t> Rows;
+ bool Found;
+
+ // Look up using Sub3Die, which has an invalid DW_AT_LLVM_stmt_sequence
+ Found = Table->lookupAddressRange(
+ {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+ &Sub3Die);
+ EXPECT_FALSE(Found);
+
+ // Look up using Sub1Die, which has a valid DW_AT_LLVM_stmt_sequence and
+ // should return row 0
+ Found = Table->lookupAddressRange(
+ {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+ &Sub1Die);
+ EXPECT_TRUE(Found);
+ ASSERT_EQ(Rows.size(), 1u);
+ EXPECT_EQ(Rows[0], 0);
+
+ // Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and
+ // should return row 3
+ Rows.clear();
+ Found = Table->lookupAddressRange(
+ {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+ &Sub2Die);
+ EXPECT_TRUE(Found);
+ ASSERT_EQ(Rows.size(), 1u);
+ EXPECT_EQ(Rows[0], 3);
+ }
+}
} // end anonymous namespace
>From ad1d1d0a606f35466c8fdf74a00d7363d2a50442 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Tue, 21 Jan 2025 15:39:36 -0800
Subject: [PATCH 2/8] Address Comments #1
---
.../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 16 +++++----------
llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 20 +++++++++++--------
2 files changed, 17 insertions(+), 19 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 732a4bfc28ab3a3..734a00801457bd6 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -210,7 +210,7 @@ class DWARFDebugLine {
bool Empty;
/// The offset into the line table where this sequence begins
- uint64_t Offset = 0;
+ uint64_t StmtSeqOffset = UINT64_MAX;
void reset();
@@ -227,6 +227,8 @@ class DWARFDebugLine {
return SectionIndex == PC.SectionIndex &&
(LowPC <= PC.Address && PC.Address < HighPC);
}
+
+ void SetSequenceOffset(uint64_t Offset) { StmtSeqOffset = Offset; }
};
struct LineTable {
@@ -403,16 +405,8 @@ class DWARFDebugLine {
ParsingState(struct LineTable *LT, uint64_t TableOffset,
function_ref<void(Error)> ErrorHandler);
- void resetRowAndSequence();
-
- /// Append the current Row to the LineTable's matrix of rows and update the
- /// current Sequence information.
- ///
- /// \param LineTableOffset - the offset into the line table where the
- /// current sequence of rows begins. This offset is stored in the Sequence
- /// to allow filtering rows based on their originating sequence when a
- /// DW_AT_LLVM_stmt_sequence attribute is present.
- void appendRowToMatrix(uint64_t LineTableOffset);
+ void resetRowAndSequence(uint64_t Offset = UINT64_MAX);
+ void appendRowToMatrix();
struct AddrOpIndexDelta {
uint64_t AddrOffset;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 27b4c6a5489127f..dd91ef04eca83f9 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -531,6 +531,7 @@ void DWARFDebugLine::Sequence::reset() {
FirstRowIndex = 0;
LastRowIndex = 0;
Empty = true;
+ StmtSeqOffset = UINT64_MAX;
}
DWARFDebugLine::LineTable::LineTable() { clear(); }
@@ -565,14 +566,16 @@ DWARFDebugLine::ParsingState::ParsingState(
resetRowAndSequence();
}
-void DWARFDebugLine::ParsingState::resetRowAndSequence() {
+void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
Row.reset(LineTable->Prologue.DefaultIsStmt);
Sequence.reset();
+ if (Offset != UINT64_MAX) {
+ Sequence.SetSequenceOffset(Offset);
+ }
}
-void DWARFDebugLine::ParsingState::appendRowToMatrix(uint64_t LineTableOffset) {
+void DWARFDebugLine::ParsingState::appendRowToMatrix() {
unsigned RowNumber = LineTable->Rows.size();
- Sequence.Offset = LineTableOffset;
if (Sequence.Empty) {
// Record the beginning of instruction sequence.
Sequence.Empty = false;
@@ -849,7 +852,9 @@ Error DWARFDebugLine::LineTable::parse(
*OS << '\n';
Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
}
- uint64_t LineTableSeqOffset = *OffsetPtr;
+ // *OffsetPtr points to the end of the prologue - i.e. the start of the first
+ // sequence. So initialize the first sequence offset accordingly.
+ State.Sequence.SetSequenceOffset(*OffsetPtr);
bool TombstonedAddress = false;
auto EmitRow = [&] {
@@ -860,7 +865,7 @@ Error DWARFDebugLine::LineTable::parse(
}
if (OS)
State.Row.dump(*OS);
- State.appendRowToMatrix(LineTableSeqOffset);
+ State.appendRowToMatrix();
}
};
while (*OffsetPtr < EndOffset) {
@@ -915,10 +920,9 @@ Error DWARFDebugLine::LineTable::parse(
// into this code path - if it were invalid, the default case would be
// followed.
EmitRow();
- State.resetRowAndSequence();
// Cursor now points to right after the end_sequence opcode - so points
// to the start of the next sequence - if one exists.
- LineTableSeqOffset = Cursor.tell();
+ State.resetRowAndSequence(Cursor.tell());
break;
case DW_LNE_set_address:
@@ -1419,7 +1423,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
// Skip sequences that don't match our stmt_sequence offset if one was
// provided
- if (StmtSequenceOffset && CurSeq.Offset != *StmtSequenceOffset) {
+ if (StmtSequenceOffset && CurSeq.StmtSeqOffset != *StmtSequenceOffset) {
++SeqPos;
continue;
}
>From cc8df97c0619b1456a371e132642f1c08e7a7dba Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Tue, 21 Jan 2025 17:58:51 -0800
Subject: [PATCH 3/8] Fix failing test
---
.../unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index 12ec63d10449f8c..002e105daeb9234 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -2059,25 +2059,25 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
dwarfgen::CompileUnit &CU = Gen->addCompileUnit();
dwarfgen::DIE CUDie = CU.getUnitDIE();
- CUDie.addAttribute(DW_AT_name, DW_FORM_strp, "/tmp/main.c");
+ CUDie.addAttribute(DW_AT_name, DW_FORM_string, "/tmp/main.c");
CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C);
dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram);
- SD1.addAttribute(DW_AT_name, DW_FORM_strp, "sub1");
+ SD1.addAttribute(DW_AT_name, DW_FORM_string, "sub1");
SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
// DW_AT_LLVM_stmt_sequence points to the first sequence
SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e);
dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram);
- SD2.addAttribute(DW_AT_name, DW_FORM_strp, "sub2");
+ SD2.addAttribute(DW_AT_name, DW_FORM_string, "sub2");
SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
// DW_AT_LLVM_stmt_sequence points to the second sequence
SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42);
dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram);
- SD3.addAttribute(DW_AT_name, DW_FORM_strp, "sub3");
+ SD3.addAttribute(DW_AT_name, DW_FORM_string, "sub3");
SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
// Invalid DW_AT_LLVM_stmt_sequence
@@ -2153,7 +2153,7 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
&Sub1Die);
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
- EXPECT_EQ(Rows[0], 0);
+ EXPECT_EQ(Rows[0], 0U);
// Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and
// should return row 3
@@ -2163,7 +2163,7 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
&Sub2Die);
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
- EXPECT_EQ(Rows[0], 3);
+ EXPECT_EQ(Rows[0], 3u);
}
}
} // end anonymous namespace
>From 88a816f2971d4c90a00a8a06cdf7bc77d539b569 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Thu, 23 Jan 2025 15:19:01 -0800
Subject: [PATCH 4/8] Dont pass DIE, pass offset
---
.../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 12 +++++------
llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 11 ++--------
.../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 20 +++++++++++--------
3 files changed, 20 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 734a00801457bd6..5694cbdde6fdbe4 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -254,14 +254,14 @@ class DWARFDebugLine {
/// \param Address - The starting address of the range.
/// \param Size - The size of the address range.
/// \param Result - The vector to fill with row indices.
- /// \param Die - if provided, the function will check for a
- /// DW_AT_LLVM_stmt_sequence attribute. If present, only rows from the
- /// sequence starting at the matching offset will be added to the result.
+ /// \param StmtSequenceOffset - if provided, only rows from the sequence
+ /// starting at the matching offset will be added to the result.
///
/// Returns true if any rows were found.
- bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result,
- const DWARFDie *Die = nullptr) const;
+ bool lookupAddressRange(
+ object::SectionedAddress Address, uint64_t Size,
+ std::vector<uint32_t> &Result,
+ std::optional<uint64_t> StmtSequenceOffset = std::nullopt) const;
bool hasFileAtIndex(uint64_t FileIndex) const {
return Prologue.hasFileAtIndex(FileIndex);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index dd91ef04eca83f9..eb5181276a37a44 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -1374,15 +1374,8 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,
bool DWARFDebugLine::LineTable::lookupAddressRange(
object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result, const DWARFDie *Die) const {
-
- // If DIE is provided, check for DW_AT_LLVM_stmt_sequence
- std::optional<uint64_t> StmtSequenceOffset;
- if (Die) {
- if (auto StmtSeqAttr = Die->find(dwarf::DW_AT_LLVM_stmt_sequence)) {
- StmtSequenceOffset = toSectionOffset(StmtSeqAttr);
- }
- }
+ std::vector<uint32_t> &Result,
+ std::optional<uint64_t> StmtSequenceOffset) const {
// Search for relocatable addresses
if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index 002e105daeb9234..4c0116ad32b1fd3 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -2140,27 +2140,31 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
std::vector<uint32_t> Rows;
bool Found;
- // Look up using Sub3Die, which has an invalid DW_AT_LLVM_stmt_sequence
+ // Look up using Sub3Die's invalid stmt_sequence offset
+ auto StmtSeqAttr3 = Sub3Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
+ ASSERT_TRUE(StmtSeqAttr3);
Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
- &Sub3Die);
+ toSectionOffset(StmtSeqAttr3));
EXPECT_FALSE(Found);
- // Look up using Sub1Die, which has a valid DW_AT_LLVM_stmt_sequence and
- // should return row 0
+ // Look up using Sub1Die's valid stmt_sequence offset
+ auto StmtSeqAttr1 = Sub1Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
+ ASSERT_TRUE(StmtSeqAttr1);
Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
- &Sub1Die);
+ toSectionOffset(StmtSeqAttr1));
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
EXPECT_EQ(Rows[0], 0U);
- // Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and
- // should return row 3
+ // Look up using Sub2Die's valid stmt_sequence offset
Rows.clear();
+ auto StmtSeqAttr2 = Sub2Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
+ ASSERT_TRUE(StmtSeqAttr2);
Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
- &Sub2Die);
+ toSectionOffset(StmtSeqAttr2));
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
EXPECT_EQ(Rows[0], 3u);
>From 7aa8caeec4133979313ddbc34c4f1a87aa0df81d Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Wed, 29 Jan 2025 05:28:23 -0800
Subject: [PATCH 5/8] Address feedback nr.2
---
llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h | 2 +-
llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 10 +++-------
2 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 5694cbdde6fdbe4..b1bfc139119afe5 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -405,7 +405,7 @@ class DWARFDebugLine {
ParsingState(struct LineTable *LT, uint64_t TableOffset,
function_ref<void(Error)> ErrorHandler);
- void resetRowAndSequence(uint64_t Offset = UINT64_MAX);
+ void resetRowAndSequence(uint64_t Offset);
void appendRowToMatrix();
struct AddrOpIndexDelta {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index eb5181276a37a44..ebc1e28fa0294b2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -562,16 +562,12 @@ void DWARFDebugLine::LineTable::clear() {
DWARFDebugLine::ParsingState::ParsingState(
struct LineTable *LT, uint64_t TableOffset,
function_ref<void(Error)> ErrorHandler)
- : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {
- resetRowAndSequence();
-}
+ : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {}
void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
Row.reset(LineTable->Prologue.DefaultIsStmt);
Sequence.reset();
- if (Offset != UINT64_MAX) {
- Sequence.SetSequenceOffset(Offset);
- }
+ Sequence.SetSequenceOffset(Offset);
}
void DWARFDebugLine::ParsingState::appendRowToMatrix() {
@@ -854,7 +850,7 @@ Error DWARFDebugLine::LineTable::parse(
}
// *OffsetPtr points to the end of the prologue - i.e. the start of the first
// sequence. So initialize the first sequence offset accordingly.
- State.Sequence.SetSequenceOffset(*OffsetPtr);
+ State.resetRowAndSequence(*OffsetPtr);
bool TombstonedAddress = false;
auto EmitRow = [&] {
>From 16e8e204ac986a45f7778e624e05e19e44224098 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Tue, 4 Feb 2025 09:07:55 -0800
Subject: [PATCH 6/8] Address Feedback Nr.3
---
.../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 2 -
llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 99 +++++++----
.../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 160 ++++++++----------
3 files changed, 136 insertions(+), 125 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index b1bfc139119afe5..61f17a27b3d286c 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -227,8 +227,6 @@ class DWARFDebugLine {
return SectionIndex == PC.SectionIndex &&
(LowPC <= PC.Address && PC.Address < HighPC);
}
-
- void SetSequenceOffset(uint64_t Offset) { StmtSeqOffset = Offset; }
};
struct LineTable {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index ebc1e28fa0294b2..606635da2fec6dd 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -567,7 +567,7 @@ DWARFDebugLine::ParsingState::ParsingState(
void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
Row.reset(LineTable->Prologue.DefaultIsStmt);
Sequence.reset();
- Sequence.SetSequenceOffset(Offset);
+ Sequence.StmtSeqOffset = Offset;
}
void DWARFDebugLine::ParsingState::appendRowToMatrix() {
@@ -1391,51 +1391,86 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
std::optional<uint64_t> StmtSequenceOffset) const {
if (Sequences.empty())
return false;
- uint64_t EndAddr = Address.Address + Size;
- // First, find an instruction sequence containing the given address.
- DWARFDebugLine::Sequence Sequence;
- Sequence.SectionIndex = Address.SectionIndex;
- Sequence.HighPC = Address.Address;
- SequenceIter LastSeq = Sequences.end();
- SequenceIter SeqPos = llvm::upper_bound(
- Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC);
- if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
- return false;
- SequenceIter StartPos = SeqPos;
+ const uint64_t EndAddr = Address.Address + Size;
- // Add the rows from the first sequence to the vector, starting with the
- // index we just calculated
-
- while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
- const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
-
- // Skip sequences that don't match our stmt_sequence offset if one was
- // provided
- if (StmtSequenceOffset && CurSeq.StmtSeqOffset != *StmtSequenceOffset) {
- ++SeqPos;
- continue;
- }
+ // Helper: Given a sequence and a starting address, find the subset
+ // of rows within [Address, EndAddr) and append them to `Result`.
+ auto addRowsFromSequence = [&](const Sequence &Seq,
+ object::SectionedAddress StartAddr,
+ uint64_t EndAddress, bool IsFirstSequence) {
+ // If this sequence does not intersect our [StartAddr, EndAddress) range,
+ // do nothing.
+ if (Seq.HighPC <= StartAddr.Address || Seq.LowPC >= EndAddress)
+ return;
- // For the first sequence, we need to find which row in the sequence is the
- // first in our range.
- uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
- if (SeqPos == StartPos)
- FirstRowIndex = findRowInSeq(CurSeq, Address);
+ // For the "first" sequence in the search, we must figure out which row
+ // actually starts within our address range.
+ uint32_t FirstRowIndex = Seq.FirstRowIndex;
+ if (IsFirstSequence)
+ FirstRowIndex = findRowInSeq(Seq, StartAddr);
- // Figure out the last row in the range.
+ // Similarly, compute the last row that is within [StartAddr, EndAddress).
uint32_t LastRowIndex =
- findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex});
+ findRowInSeq(Seq, {EndAddress - 1, StartAddr.SectionIndex});
if (LastRowIndex == UnknownRowIndex)
- LastRowIndex = CurSeq.LastRowIndex - 1;
+ LastRowIndex = Seq.LastRowIndex - 1;
assert(FirstRowIndex != UnknownRowIndex);
assert(LastRowIndex != UnknownRowIndex);
+ // Append all row indices in [FirstRowIndex, LastRowIndex].
for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) {
Result.push_back(I);
}
+ };
+
+ // If a stmt_sequence_offset was provided, do a binary search to find the
+ // single sequence that matches that offset. Then add only its relevant rows.
+ if (StmtSequenceOffset) {
+ // Binary-search the Sequences by their StmtSeqOffset:
+ auto It = llvm::lower_bound(Sequences, *StmtSequenceOffset,
+ [](const Sequence &Seq, uint64_t OffsetVal) {
+ return Seq.StmtSeqOffset < OffsetVal;
+ });
+
+ // If not found or mismatched, there’s no match to return.
+ if (It == Sequences.end() || It->StmtSeqOffset != *StmtSequenceOffset)
+ return false;
+
+ // Now add rows from the discovered sequence if it intersects [Address,
+ // EndAddr).
+ addRowsFromSequence(*It, Address, EndAddr, /*IsFirstSequence=*/true);
+ return !Result.empty();
+ }
+ // Otherwise, fall back to logic of walking sequences by Address.
+ // We first find a sequence containing `Address` (via upper_bound on HighPC),
+ // then proceed forward through overlapping sequences in ascending order.
+
+ // Construct a dummy Sequence to find where `Address` fits by HighPC.
+ DWARFDebugLine::Sequence SearchSeq;
+ SearchSeq.SectionIndex = Address.SectionIndex;
+ SearchSeq.HighPC = Address.Address;
+
+ auto LastSeq = Sequences.end();
+ auto SeqPos = llvm::upper_bound(Sequences, SearchSeq,
+ DWARFDebugLine::Sequence::orderByHighPC);
+ if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
+ return false;
+
+ // This marks the first sequence we found that might contain Address.
+ const auto StartPos = SeqPos;
+
+ // Walk forward until sequences no longer intersect [Address, EndAddr).
+ while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
+ // Add rows only if the sequence is in the same section:
+ if (SeqPos->SectionIndex == Address.SectionIndex) {
+ // For the very first sequence, we need the row that lines up with
+ // `Address`.
+ bool IsFirst = (SeqPos == StartPos);
+ addRowsFromSequence(*SeqPos, Address, EndAddr, IsFirst);
+ }
++SeqPos;
}
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index 4c0116ad32b1fd3..2fe52600df923bf 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -2036,138 +2036,116 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) {
}
/// Test that lookupAddressRange correctly filters rows based on
-/// DW_AT_LLVM_stmt_sequence.
+/// a statement-sequence offset (simulating DW_AT_LLVM_stmt_sequence).
///
/// This test verifies that:
-/// 1. When a DIE has a DW_AT_LLVM_stmt_sequence attribute, lookupAddressRange
-/// only returns rows from the sequence starting at the specified offset
-/// 2. When a DIE has an invalid DW_AT_LLVM_stmt_sequence offset, no rows are
-/// returned
-/// 3. When no DW_AT_LLVM_stmt_sequence is present, all matching rows are
-/// returned
+/// 1. When a statement-sequence offset is provided, lookupAddressRange
+/// only returns rows from the sequence starting at that offset.
+/// 2. When an invalid statement-sequence offset is provided, no rows
+/// are returned.
+/// 3. When no statement-sequence offset is provided, all matching rows
+/// in the table are returned.
///
-/// The test creates a line table with two sequences at the same address range
-/// but different line numbers. It then creates three subprogram DIEs:
-/// - One with DW_AT_LLVM_stmt_sequence pointing to the first sequence
-/// - One with DW_AT_LLVM_stmt_sequence pointing to the second sequence
-/// - One with an invalid DW_AT_LLVM_stmt_sequence offset
+/// We build a line table with two sequences at the same address range
+/// but different line numbers. Then we try lookups with various statement-
+/// sequence offsets to check the filtering logic.
TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
if (!setupGenerator())
GTEST_SKIP();
- // Create new DWARF with the subprogram DIE
- dwarfgen::CompileUnit &CU = Gen->addCompileUnit();
- dwarfgen::DIE CUDie = CU.getUnitDIE();
-
- CUDie.addAttribute(DW_AT_name, DW_FORM_string, "/tmp/main.c");
- CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C);
-
- dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram);
- SD1.addAttribute(DW_AT_name, DW_FORM_string, "sub1");
- SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
- SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
- // DW_AT_LLVM_stmt_sequence points to the first sequence
- SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e);
-
- dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram);
- SD2.addAttribute(DW_AT_name, DW_FORM_string, "sub2");
- SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
- SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
- // DW_AT_LLVM_stmt_sequence points to the second sequence
- SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42);
-
- dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram);
- SD3.addAttribute(DW_AT_name, DW_FORM_string, "sub3");
- SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
- SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
- // Invalid DW_AT_LLVM_stmt_sequence
- SD3.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x66);
-
- // Create a line table with multiple sequences
+ // Create a line table that has two sequences covering [0x1000, 0x1004).
+ // Each sequence has two rows: addresses at 0x1000 and 0x1004, but
+ // they differ by line numbers (100 vs. 200, etc.).
+ //
+ // We'll pretend the first sequence starts at offset 0x2e in the line table,
+ // the second at 0x42, and we'll also test an invalid offset 0x66.
+
LineTable < = Gen->addLineTable();
- // First sequence with addresses 0x1000(Ln100), 0x1004(Ln101)
+ // First sequence at offset 0x2e: addresses 0x1000(Ln=100), 0x1004(Ln=101)
LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+ // Advance the line register by 99 (so line=100) and copy.
LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}});
LT.addStandardOpcode(DW_LNS_copy, {});
- LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+ // 0x4b is a special opcode: address += 4, line += 1 (so line=101).
+ LT.addByte(0x4b);
+ // End this sequence.
LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
- // Second sequence with addresses 0x1000(Ln200), 0x1004(Ln201)
+ // Second sequence at offset 0x42: addresses 0x1000(Ln=200), 0x1004(Ln=201)
LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}});
LT.addStandardOpcode(DW_LNS_copy, {});
- LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+ // 0x4b again: address += 4, line += 1 (so line=201).
+ LT.addByte(0x4b);
+ // End this second sequence.
LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
- // Generate the DWARF
+ // Generate the DWARF data.
generate();
- // Parse the line table to get the sequence offset
- auto ExpectedLineTable = Line.getOrParseLineTable(
- LineData, /*Offset=*/0, *Context, nullptr, RecordRecoverable);
+ // Parse the line table.
+ auto ExpectedLineTable =
+ Line.getOrParseLineTable(LineData, /*Offset=*/0, *Context,
+ /*DwarfUnit=*/nullptr, RecordRecoverable);
ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded());
const auto *Table = *ExpectedLineTable;
- uint32_t NumCUs = Context->getNumCompileUnits();
- ASSERT_EQ(NumCUs, 1u);
- DWARFUnit *Unit = Context->getUnitAtIndex(0);
- auto DwarfCUDie = Unit->getUnitDIE(false);
-
- auto Sub1Die = DwarfCUDie.getFirstChild();
- auto Sub2Die = Sub1Die.getSibling();
- auto Sub3Die = Sub2Die.getSibling();
-
- // Verify Sub1Die is the DIE generated from SD1
- auto NameAttr1 = Sub1Die.find(DW_AT_name);
- EXPECT_STREQ(*dwarf::toString(*NameAttr1), "sub1");
-
- // Verify Sub2Die is the DIE generated from SD2
- auto NameAttr2 = Sub2Die.find(DW_AT_name);
- EXPECT_STREQ(*dwarf::toString(*NameAttr2), "sub2");
-
- // Verify Sub2Die is the DIE generated from SD3
- auto NameAttr3 = Sub3Die.find(DW_AT_name);
- EXPECT_STREQ(*dwarf::toString(*NameAttr3), "sub3");
-
- // Ensure there are two sequences
+ // The table should have two sequences, each starting at our chosen offsets.
ASSERT_EQ(Table->Sequences.size(), 2u);
- // Lookup addresses in the first sequence with the second sequence's filter
+ // 1) Try looking up with an invalid offset (simulating an invalid
+ // DW_AT_LLVM_stmt_sequence). We expect no rows.
{
std::vector<uint32_t> Rows;
- bool Found;
-
- // Look up using Sub3Die's invalid stmt_sequence offset
- auto StmtSeqAttr3 = Sub3Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
- ASSERT_TRUE(StmtSeqAttr3);
- Found = Table->lookupAddressRange(
+ bool Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
- toSectionOffset(StmtSeqAttr3));
+ /*StmtSequenceOffset=*/0x66); // invalid offset
EXPECT_FALSE(Found);
+ EXPECT_TRUE(Rows.empty());
+ }
- // Look up using Sub1Die's valid stmt_sequence offset
- auto StmtSeqAttr1 = Sub1Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
- ASSERT_TRUE(StmtSeqAttr1);
- Found = Table->lookupAddressRange(
+ // 2) Look up using the offset 0x2e (our first sequence). We expect
+ // to get the rows from that sequence only (which for 0x1000 is row #0).
+ {
+ std::vector<uint32_t> Rows;
+ bool Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
- toSectionOffset(StmtSeqAttr1));
+ /*StmtSequenceOffset=*/0x2e);
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
- EXPECT_EQ(Rows[0], 0U);
+ // The first sequence's first row is index 0.
+ EXPECT_EQ(Rows[0], 0u);
+ }
- // Look up using Sub2Die's valid stmt_sequence offset
- Rows.clear();
- auto StmtSeqAttr2 = Sub2Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
- ASSERT_TRUE(StmtSeqAttr2);
- Found = Table->lookupAddressRange(
+ // 3) Look up using the offset 0x42 (second sequence). For address 0x1000
+ // in that second sequence, we should see row #2.
+ {
+ std::vector<uint32_t> Rows;
+ bool Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
- toSectionOffset(StmtSeqAttr2));
+ /*StmtSequenceOffset=*/0x42);
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
+ // The second sequence's first row is index 2 in the table.
EXPECT_EQ(Rows[0], 3u);
}
+
+ // 4) Look up with no statement-sequence offset specified.
+ // We should get rows from both sequences for address 0x1000.
+ {
+ std::vector<uint32_t> Rows;
+ bool Found = Table->lookupAddressRange(
+ {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+ std::nullopt /* no filter */);
+ EXPECT_TRUE(Found);
+ // The first sequence's row is #0, second's row is #2, so both should
+ // appear.
+ ASSERT_EQ(Rows.size(), 2u);
+ EXPECT_EQ(Rows[0], 0u);
+ EXPECT_EQ(Rows[1], 3u);
+ }
}
} // end anonymous namespace
>From 569bbe4b08b4a673fca6b26b1eb6a624f22bf080 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Wed, 5 Feb 2025 16:03:17 -0800
Subject: [PATCH 7/8] Address Feedback Nr.4
---
llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 111 ++++++++------------
1 file changed, 44 insertions(+), 67 deletions(-)
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 606635da2fec6dd..267958a8602ff04 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -1391,90 +1391,67 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
std::optional<uint64_t> StmtSequenceOffset) const {
if (Sequences.empty())
return false;
+ uint64_t EndAddr = Address.Address + Size;
+ // First, find an instruction sequence containing the given address.
+ DWARFDebugLine::Sequence Sequence;
+ Sequence.SectionIndex = Address.SectionIndex;
+ Sequence.HighPC = Address.Address;
+ SequenceIter LastSeq = Sequences.end();
+ SequenceIter SeqPos;
- const uint64_t EndAddr = Address.Address + Size;
+ if (StmtSequenceOffset) {
+ // If we have a statement sequence offset, find the specific sequence
+ // Binary search for sequence with matching StmtSeqOffset
+ Sequence.StmtSeqOffset = *StmtSequenceOffset;
+ SeqPos = std::lower_bound(Sequences.begin(), LastSeq, Sequence,
+ [](const DWARFDebugLine::Sequence &LHS,
+ const DWARFDebugLine::Sequence &RHS) {
+ return LHS.StmtSeqOffset < RHS.StmtSeqOffset;
+ });
+
+ // If sequence not found or doesn't contain the address, return false
+ if (SeqPos == LastSeq || SeqPos->StmtSeqOffset != *StmtSequenceOffset ||
+ !SeqPos->containsPC(Address))
+ return false;
- // Helper: Given a sequence and a starting address, find the subset
- // of rows within [Address, EndAddr) and append them to `Result`.
- auto addRowsFromSequence = [&](const Sequence &Seq,
- object::SectionedAddress StartAddr,
- uint64_t EndAddress, bool IsFirstSequence) {
- // If this sequence does not intersect our [StartAddr, EndAddress) range,
- // do nothing.
- if (Seq.HighPC <= StartAddr.Address || Seq.LowPC >= EndAddress)
- return;
+ // Set LastSeq to just past this sequence since we only want this one
+ LastSeq = std::next(SeqPos);
+ } else {
+ // No specific sequence requested, find first sequence containing address
+ SeqPos = std::upper_bound(Sequences.begin(), LastSeq, Sequence,
+ DWARFDebugLine::Sequence::orderByHighPC);
+ if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
+ return false;
+ }
- // For the "first" sequence in the search, we must figure out which row
- // actually starts within our address range.
- uint32_t FirstRowIndex = Seq.FirstRowIndex;
- if (IsFirstSequence)
- FirstRowIndex = findRowInSeq(Seq, StartAddr);
+ SequenceIter StartPos = SeqPos;
- // Similarly, compute the last row that is within [StartAddr, EndAddress).
+ // Process sequences that overlap with the desired range
+ while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
+ const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
+ // For the first sequence, we need to find which row in the sequence is the
+ // first in our range.
+ uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
+ if (SeqPos == StartPos)
+ FirstRowIndex = findRowInSeq(CurSeq, Address);
+
+ // Figure out the last row in the range.
uint32_t LastRowIndex =
- findRowInSeq(Seq, {EndAddress - 1, StartAddr.SectionIndex});
+ findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex});
if (LastRowIndex == UnknownRowIndex)
- LastRowIndex = Seq.LastRowIndex - 1;
+ LastRowIndex = CurSeq.LastRowIndex - 1;
assert(FirstRowIndex != UnknownRowIndex);
assert(LastRowIndex != UnknownRowIndex);
- // Append all row indices in [FirstRowIndex, LastRowIndex].
for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) {
Result.push_back(I);
}
- };
- // If a stmt_sequence_offset was provided, do a binary search to find the
- // single sequence that matches that offset. Then add only its relevant rows.
- if (StmtSequenceOffset) {
- // Binary-search the Sequences by their StmtSeqOffset:
- auto It = llvm::lower_bound(Sequences, *StmtSequenceOffset,
- [](const Sequence &Seq, uint64_t OffsetVal) {
- return Seq.StmtSeqOffset < OffsetVal;
- });
-
- // If not found or mismatched, there’s no match to return.
- if (It == Sequences.end() || It->StmtSeqOffset != *StmtSequenceOffset)
- return false;
-
- // Now add rows from the discovered sequence if it intersects [Address,
- // EndAddr).
- addRowsFromSequence(*It, Address, EndAddr, /*IsFirstSequence=*/true);
- return !Result.empty();
- }
-
- // Otherwise, fall back to logic of walking sequences by Address.
- // We first find a sequence containing `Address` (via upper_bound on HighPC),
- // then proceed forward through overlapping sequences in ascending order.
-
- // Construct a dummy Sequence to find where `Address` fits by HighPC.
- DWARFDebugLine::Sequence SearchSeq;
- SearchSeq.SectionIndex = Address.SectionIndex;
- SearchSeq.HighPC = Address.Address;
-
- auto LastSeq = Sequences.end();
- auto SeqPos = llvm::upper_bound(Sequences, SearchSeq,
- DWARFDebugLine::Sequence::orderByHighPC);
- if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
- return false;
-
- // This marks the first sequence we found that might contain Address.
- const auto StartPos = SeqPos;
-
- // Walk forward until sequences no longer intersect [Address, EndAddr).
- while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
- // Add rows only if the sequence is in the same section:
- if (SeqPos->SectionIndex == Address.SectionIndex) {
- // For the very first sequence, we need the row that lines up with
- // `Address`.
- bool IsFirst = (SeqPos == StartPos);
- addRowsFromSequence(*SeqPos, Address, EndAddr, IsFirst);
- }
++SeqPos;
}
- return !Result.empty();
+ return true;
}
std::optional<StringRef>
>From 539ab4cd67db9a5239478379a683df894a79d832 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Wed, 5 Feb 2025 17:28:29 -0800
Subject: [PATCH 8/8] Address Feedback Nr.5
---
llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 32 +++++++++++----------
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 267958a8602ff04..547a288a82d25df 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -1400,30 +1400,32 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
SequenceIter SeqPos;
if (StmtSequenceOffset) {
- // If we have a statement sequence offset, find the specific sequence
- // Binary search for sequence with matching StmtSeqOffset
- Sequence.StmtSeqOffset = *StmtSequenceOffset;
- SeqPos = std::lower_bound(Sequences.begin(), LastSeq, Sequence,
- [](const DWARFDebugLine::Sequence &LHS,
- const DWARFDebugLine::Sequence &RHS) {
- return LHS.StmtSeqOffset < RHS.StmtSeqOffset;
- });
-
- // If sequence not found or doesn't contain the address, return false
- if (SeqPos == LastSeq || SeqPos->StmtSeqOffset != *StmtSequenceOffset ||
- !SeqPos->containsPC(Address))
+ // If we have a statement sequence offset, find the specific sequence.
+ // Linear search for sequence with matching StmtSeqOffset
+ SeqPos = std::find_if(Sequences.begin(), LastSeq,
+ [&](const DWARFDebugLine::Sequence &S) {
+ return S.StmtSeqOffset == *StmtSequenceOffset;
+ });
+
+ // If sequence not found, return false
+ if (SeqPos == LastSeq)
return false;
- // Set LastSeq to just past this sequence since we only want this one
- LastSeq = std::next(SeqPos);
+ // Set LastSeq to the next sequence since we only want the one matching
+ // sequence (sequences are guaranteed to have unique StmtSeqOffset)
+ LastSeq = SeqPos + 1;
} else {
// No specific sequence requested, find first sequence containing address
SeqPos = std::upper_bound(Sequences.begin(), LastSeq, Sequence,
DWARFDebugLine::Sequence::orderByHighPC);
- if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
+ if (SeqPos == LastSeq)
return false;
}
+ // If the start sequence doesn't contain the address, nothing to do
+ if (!SeqPos->containsPC(Address))
+ return false;
+
SequenceIter StartPos = SeqPos;
// Process sequences that overlap with the desired range
More information about the llvm-commits
mailing list