[llvm] [DebugInfo][DWARF] Utilize DW_AT_LLVM_stmt_sequence attr in line table lookups (PR #123391)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 17 11:58:40 PST 2025
https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/123391
>From 82a6fb89d00856196f534383115afe08b42fe417 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Fri, 17 Jan 2025 11:33:20 -0800
Subject: [PATCH] [DebugInfo] Add option to use DW_AT_LLVM_stmt_sequence in
line table lookups
---
.../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 43 +++++-
llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 37 ++++-
.../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 130 +++++++++++++++++-
3 files changed, 198 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index ff7bf87d8e6b5a..732a4bfc28ab3a 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -209,6 +209,9 @@ class DWARFDebugLine {
unsigned LastRowIndex;
bool Empty;
+ /// The offset into the line table where this sequence begins
+ uint64_t Offset = 0;
+
void reset();
static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
@@ -243,8 +246,20 @@ class DWARFDebugLine {
uint32_t lookupAddress(object::SectionedAddress Address,
bool *IsApproximateLine = nullptr) const;
+ /// Fills the Result argument with the indices of the rows that correspond
+ /// to the address range specified by \p Address and \p Size.
+ ///
+ /// \param Address - The starting address of the range.
+ /// \param Size - The size of the address range.
+ /// \param Result - The vector to fill with row indices.
+ /// \param Die - if provided, the function will check for a
+ /// DW_AT_LLVM_stmt_sequence attribute. If present, only rows from the
+ /// sequence starting at the matching offset will be added to the result.
+ ///
+ /// Returns true if any rows were found.
bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result) const;
+ std::vector<uint32_t> &Result,
+ const DWARFDie *Die = nullptr) const;
bool hasFileAtIndex(uint64_t FileIndex) const {
return Prologue.hasFileAtIndex(FileIndex);
@@ -305,8 +320,20 @@ class DWARFDebugLine {
uint32_t lookupAddressImpl(object::SectionedAddress Address,
bool *IsApproximateLine = nullptr) const;
- bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result) const;
+ /// Fills the Result argument with the indices of the rows that correspond
+ /// to the address range specified by \p Address and \p Size.
+ ///
+ /// \param Address - The starting address of the range.
+ /// \param Size - The size of the address range.
+ /// \param Result - The vector to fill with row indices.
+ /// \param StmtSequenceOffset - if provided, only rows from the sequence
+ /// starting at the matching offset will be added to the result.
+ ///
+ /// Returns true if any rows were found.
+ bool
+ lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
+ std::vector<uint32_t> &Result,
+ std::optional<uint64_t> StmtSequenceOffset) const;
};
const LineTable *getLineTable(uint64_t Offset) const;
@@ -377,7 +404,15 @@ class DWARFDebugLine {
function_ref<void(Error)> ErrorHandler);
void resetRowAndSequence();
- void appendRowToMatrix();
+
+ /// Append the current Row to the LineTable's matrix of rows and update the
+ /// current Sequence information.
+ ///
+ /// \param LineTableOffset - the offset into the line table where the
+ /// current sequence of rows begins. This offset is stored in the Sequence
+ /// to allow filtering rows based on their originating sequence when a
+ /// DW_AT_LLVM_stmt_sequence attribute is present.
+ void appendRowToMatrix(uint64_t LineTableOffset);
struct AddrOpIndexDelta {
uint64_t AddrOffset;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 939a5163d55abc..27b4c6a5489127 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -570,8 +570,9 @@ void DWARFDebugLine::ParsingState::resetRowAndSequence() {
Sequence.reset();
}
-void DWARFDebugLine::ParsingState::appendRowToMatrix() {
+void DWARFDebugLine::ParsingState::appendRowToMatrix(uint64_t LineTableOffset) {
unsigned RowNumber = LineTable->Rows.size();
+ Sequence.Offset = LineTableOffset;
if (Sequence.Empty) {
// Record the beginning of instruction sequence.
Sequence.Empty = false;
@@ -848,6 +849,8 @@ Error DWARFDebugLine::LineTable::parse(
*OS << '\n';
Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
}
+ uint64_t LineTableSeqOffset = *OffsetPtr;
+
bool TombstonedAddress = false;
auto EmitRow = [&] {
if (!TombstonedAddress) {
@@ -857,7 +860,7 @@ Error DWARFDebugLine::LineTable::parse(
}
if (OS)
State.Row.dump(*OS);
- State.appendRowToMatrix();
+ State.appendRowToMatrix(LineTableSeqOffset);
}
};
while (*OffsetPtr < EndOffset) {
@@ -913,6 +916,9 @@ Error DWARFDebugLine::LineTable::parse(
// followed.
EmitRow();
State.resetRowAndSequence();
+ // Cursor now points to right after the end_sequence opcode - so points
+ // to the start of the next sequence - if one exists.
+ LineTableSeqOffset = Cursor.tell();
break;
case DW_LNE_set_address:
@@ -1364,10 +1370,18 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,
bool DWARFDebugLine::LineTable::lookupAddressRange(
object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result) const {
+ std::vector<uint32_t> &Result, const DWARFDie *Die) const {
+
+ // If DIE is provided, check for DW_AT_LLVM_stmt_sequence
+ std::optional<uint64_t> StmtSequenceOffset;
+ if (Die) {
+ if (auto StmtSeqAttr = Die->find(dwarf::DW_AT_LLVM_stmt_sequence)) {
+ StmtSequenceOffset = toSectionOffset(StmtSeqAttr);
+ }
+ }
// Search for relocatable addresses
- if (lookupAddressRangeImpl(Address, Size, Result))
+ if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
return true;
if (Address.SectionIndex == object::SectionedAddress::UndefSection)
@@ -1375,12 +1389,13 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
// Search for absolute addresses
Address.SectionIndex = object::SectionedAddress::UndefSection;
- return lookupAddressRangeImpl(Address, Size, Result);
+ return lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset);
}
bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
object::SectionedAddress Address, uint64_t Size,
- std::vector<uint32_t> &Result) const {
+ std::vector<uint32_t> &Result,
+ std::optional<uint64_t> StmtSequenceOffset) const {
if (Sequences.empty())
return false;
uint64_t EndAddr = Address.Address + Size;
@@ -1401,6 +1416,14 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
const DWARFDebugLine::Sequence &CurSeq = *SeqPos;
+
+ // Skip sequences that don't match our stmt_sequence offset if one was
+ // provided
+ if (StmtSequenceOffset && CurSeq.Offset != *StmtSequenceOffset) {
+ ++SeqPos;
+ continue;
+ }
+
// For the first sequence, we need to find which row in the sequence is the
// first in our range.
uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
@@ -1423,7 +1446,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
++SeqPos;
}
- return true;
+ return !Result.empty();
}
std::optional<StringRef>
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index e549128031744e..7a1d634900ab6a 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -6,10 +6,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "DwarfGenerator.h"
#include "DwarfUtils.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
@@ -2035,4 +2035,132 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) {
EXPECT_THAT(Result.c_str(), MatchesRegex("a dir.b dir.b file"));
}
+/// Test that lookupAddressRange correctly filters rows based on DW_AT_LLVM_stmt_sequence.
+///
+/// This test verifies that:
+/// 1. When a DIE has a DW_AT_LLVM_stmt_sequence attribute, lookupAddressRange only returns
+/// rows from the sequence starting at the specified offset
+/// 2. When a DIE has an invalid DW_AT_LLVM_stmt_sequence offset, no rows are returned
+/// 3. When no DW_AT_LLVM_stmt_sequence is present, all matching rows are returned
+///
+/// The test creates a line table with two sequences at the same address range but
+/// different line numbers. It then creates three subprogram DIEs:
+/// - One with DW_AT_LLVM_stmt_sequence pointing to the first sequence
+/// - One with DW_AT_LLVM_stmt_sequence pointing to the second sequence
+/// - One with an invalid DW_AT_LLVM_stmt_sequence offset
+TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
+ if (!setupGenerator())
+ GTEST_SKIP();
+
+ // Create new DWARF with the subprogram DIE
+ dwarfgen::CompileUnit &CU = Gen->addCompileUnit();
+ dwarfgen::DIE CUDie = CU.getUnitDIE();
+
+ CUDie.addAttribute(DW_AT_name, DW_FORM_strp, "/tmp/main.c");
+ CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C);
+
+ dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram);
+ SD1.addAttribute(DW_AT_name, DW_FORM_strp, "sub1");
+ SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
+ SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
+ // DW_AT_LLVM_stmt_sequence points to the first sequence
+ SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e);
+
+ dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram);
+ SD2.addAttribute(DW_AT_name, DW_FORM_strp, "sub2");
+ SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
+ SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
+ // DW_AT_LLVM_stmt_sequence points to the second sequence
+ SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42);
+
+ dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram);
+ SD3.addAttribute(DW_AT_name, DW_FORM_strp, "sub3");
+ SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
+ SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
+ // Invalid DW_AT_LLVM_stmt_sequence
+ SD3.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x66);
+
+ // Create a line table with multiple sequences
+ LineTable < = Gen->addLineTable();
+
+ // First sequence with addresses 0x1000(Ln100), 0x1004(Ln101)
+ LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
+ LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+ LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}});
+ LT.addStandardOpcode(DW_LNS_copy, {});
+ LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+ LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
+
+ // Second sequence with addresses 0x1000(Ln200), 0x1004(Ln201)
+ LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
+ LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
+ LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}});
+ LT.addStandardOpcode(DW_LNS_copy, {});
+ LT.addByte(0x4b); // Special opcode: address += 4, line += 1
+ LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});
+
+ // Generate the DWARF
+ generate();
+
+ // Parse the line table to get the sequence offset
+ auto ExpectedLineTable = Line.getOrParseLineTable(
+ LineData, /*Offset=*/0, *Context, nullptr, RecordRecoverable);
+ ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded());
+ const auto *Table = *ExpectedLineTable;
+
+ uint32_t NumCUs = Context->getNumCompileUnits();
+ ASSERT_EQ(NumCUs, 1u);
+ DWARFUnit *Unit = Context->getUnitAtIndex(0);
+ auto DwarfCUDie = Unit->getUnitDIE(false);
+
+ auto Sub1Die = DwarfCUDie.getFirstChild();
+ auto Sub2Die = Sub1Die.getSibling();
+ auto Sub3Die = Sub2Die.getSibling();
+
+ // Verify Sub1Die is the DIE generated from SD1
+ auto NameAttr1 = Sub1Die.find(DW_AT_name);
+ EXPECT_STREQ(*dwarf::toString(*NameAttr1), "sub1");
+
+ // Verify Sub2Die is the DIE generated from SD2
+ auto NameAttr2 = Sub2Die.find(DW_AT_name);
+ EXPECT_STREQ(*dwarf::toString(*NameAttr2), "sub2");
+
+ // Verify Sub2Die is the DIE generated from SD3
+ auto NameAttr3 = Sub3Die.find(DW_AT_name);
+ EXPECT_STREQ(*dwarf::toString(*NameAttr3), "sub3");
+
+ // Ensure there are two sequences
+ ASSERT_EQ(Table->Sequences.size(), 2u);
+
+ // Lookup addresses in the first sequence with the second sequence's filter
+ {
+ std::vector<uint32_t> Rows;
+ bool Found;
+
+ // Look up using Sub3Die, which has an invalid DW_AT_LLVM_stmt_sequence
+ Found = Table->lookupAddressRange(
+ {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+ &Sub3Die);
+ EXPECT_FALSE(Found);
+
+ // Look up using Sub1Die, which has a valid DW_AT_LLVM_stmt_sequence and
+ // should return row 0
+ Found = Table->lookupAddressRange(
+ {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+ &Sub1Die);
+ EXPECT_TRUE(Found);
+ ASSERT_EQ(Rows.size(), 1u);
+ EXPECT_EQ(Rows[0], 0);
+
+ // Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and
+ // should return row 3
+ Rows.clear();
+ Found = Table->lookupAddressRange(
+ {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
+ &Sub2Die);
+ EXPECT_TRUE(Found);
+ ASSERT_EQ(Rows.size(), 1u);
+ EXPECT_EQ(Rows[0], 3);
+ }
+}
} // end anonymous namespace
More information about the llvm-commits
mailing list