[llvm] [DebugInfo] Add clang flag to enable function-level debug line attribution (PR #93985)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 31 09:46:46 PDT 2024


https://github.com/alx32 created https://github.com/llvm/llvm-project/pull/93985

This patch introduces a new flag to Clang (`-mllvm -emit-func-debug-line-table-offsets`), which enables easy attribution of debug line data to their respective functions.

**Context:**
Currently, Clang generates function line information (found in the `debug_line` section) without directly linking line entries to their originating functions (`DW_TAG_subprogram` entries). Normally, line entries are associated with functions based on the executable code addresses they describe - i.e., by scanning the line table for entries that fall within the address range specified by a `DW_TAG_subprogram`'s `DW_AT_low_pc` and `DW_AT_high_pc` (or its `DW_AT_ranges`).

The problem arises when post-compile tools, such as the ICF in the LLD linker, attempt to merge multiple functions. This results in multiple `DW_TAG_subprogram` entries describing the same address range, leading to conflicting line information for the same address. In this scenario, it becomes impossible to distinguish which line entries belong to which `DW_TAG_subprogram`.

To resolve this, we introduce a new attribute `DW_AT_META_stmt_sequence` to each `DW_TAG_subprogram`. This attribute indicates the offset in the line table where the line data for the subprogram begins. Additionally, a `DW_LNE_end_sequence` entry is added to the line table to mark the end of the line information for a particular subprogram. With these changes, we can correctly attribute all line entries to their originating functions, even when functions have overlapping address spaces.

**NOTE:** While this change is functional, it is not yet fully useful. Further tooling support will be added to handle symbol resolution for functions sharing the same address space.

**Previous changes:** [https://github.com/llvm/llvm-project/pull/93137](https://github.com/llvm/llvm-project/pull/93137) – LLD option enabling the generation of debug info for ICF'd functions.

>From cf53e62776523e6334d65d09db3b0904e6dd853d Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 31 May 2024 09:10:09 -0700
Subject: [PATCH] [DebugInfo] Add subprogram attr pointing to its line entries

---
 llvm/include/llvm/BinaryFormat/Dwarf.def      |  2 ++
 llvm/include/llvm/BinaryFormat/Dwarf.h        |  1 +
 llvm/include/llvm/MC/MCDwarf.h                | 21 +++++++++++--
 llvm/include/llvm/MC/MCStreamer.h             | 27 +++++++++++++++++
 .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp   |  8 +++++
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp    | 30 ++++++++++++++++++-
 llvm/lib/MC/MCDwarf.cpp                       | 28 +++++++++++++----
 7 files changed, 108 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index adcf24eb83b03..6124babc40927 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -578,6 +578,8 @@ HANDLE_DW_AT(0x2904, GO_runtime_type, 0, GO)
 
 HANDLE_DW_AT(0x3210, UPC_threads_scaled, 0, UPC)
 
+HANDLE_DW_AT(0x3600, META_stmt_sequence, 0, META)
+
 HANDLE_DW_AT(0x393e, IBM_wsa_addr, 0, IBM)
 HANDLE_DW_AT(0x393f, IBM_home_location, 0, IBM)
 HANDLE_DW_AT(0x3940, IBM_alt_srcview, 0, IBM)
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index 74c4d6ff3a716..3e42dc240fcc0 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -84,6 +84,7 @@ enum LLVMConstants : uint32_t {
   DWARF_VENDOR_PGI,
   DWARF_VENDOR_SUN,
   DWARF_VENDOR_UPC,
+  DWARF_VENDOR_META,
   ///\}
 };
 
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index 18056c5fdf816..b6f87dc8b6c8a 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -122,6 +122,8 @@ class MCDwarfLoc {
 private: // MCContext manages these
   friend class MCContext;
   friend class MCDwarfLineEntry;
+  // DwarfDebug::endFunctionImpl needs to construct MCDwarfLoc(IsEndOfFunction)
+  friend class DwarfDebug;
 
   MCDwarfLoc(unsigned fileNum, unsigned line, unsigned column, unsigned flags,
              unsigned isa, unsigned discriminator)
@@ -194,14 +196,27 @@ class MCDwarfLineEntry : public MCDwarfLoc {
 
 public:
   // Constructor to create an MCDwarfLineEntry given a symbol and the dwarf loc.
-  MCDwarfLineEntry(MCSymbol *label, const MCDwarfLoc loc)
-      : MCDwarfLoc(loc), Label(label) {}
+  MCDwarfLineEntry(MCSymbol *label, const MCDwarfLoc loc,
+                   bool isEndOfFunction = false,
+                   MCSymbol *streamLabel = nullptr)
+      : MCDwarfLoc(loc), Label(label), IsEndOfFunction(isEndOfFunction),
+        StreamLabel(streamLabel) {}
 
   MCSymbol *getLabel() const { return Label; }
 
   // This indicates the line entry is synthesized for an end entry.
   bool IsEndEntry = false;
 
+  // This indicates that the current line entry denotes the end of a function,
+  // it is used to emit a DW_LNE_end_sequnece to reset the state machine
+  // registers.
+  bool IsEndOfFunction;
+
+  // Optional symbol to be emitted just before the line is written into the
+  // output stream. It can be used to reference the position of the start of
+  // this line's data in the output stream.
+  MCSymbol *StreamLabel;
+
   // Override the label with the given EndLabel.
   void setEndLabel(MCSymbol *EndLabel) {
     Label = EndLabel;
@@ -227,7 +242,7 @@ class MCLineSection {
 
   // Add an end entry by cloning the last entry, if exists, for the section
   // the given EndLabel belongs to. The label is replaced by the given EndLabel.
-  void addEndEntry(MCSymbol *EndLabel);
+  void addEndEntry(MCSymbol *EndLabel, bool generatingFuncLineTableOffsets);
 
   using MCDwarfLineEntryCollection = std::vector<MCDwarfLineEntry>;
   using iterator = MCDwarfLineEntryCollection::iterator;
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index b7468cf70a664..559acabee4082 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -254,6 +254,15 @@ class MCStreamer {
   /// discussion for future inclusion.
   bool AllowAutoPadding = false;
 
+  // Flag specyfing weather functions will have an offset into the line table
+  // where the line data for that function starts
+  bool GenerateFuncLineTableOffsets = false;
+
+  // Symbol that tracks the stream symbol for first line of the current function
+  // being generated. This symbol can be used to reference where the line
+  // entries for the function start in the generated line table.
+  MCSymbol *CurrentFuncFirstLineStreamSym;
+
 protected:
   MCStreamer(MCContext &Ctx);
 
@@ -310,6 +319,24 @@ class MCStreamer {
   void setAllowAutoPadding(bool v) { AllowAutoPadding = v; }
   bool getAllowAutoPadding() const { return AllowAutoPadding; }
 
+  void setGenerateFuncLineTableOffsets(bool v) {
+    GenerateFuncLineTableOffsets = v;
+  }
+  bool getGenerateFuncLineTableOffsets() const {
+    return GenerateFuncLineTableOffsets;
+  }
+
+  // Use the below functions to track the symbol that points to the current
+  // function's line info in the output stream.
+  void beginFunction() { CurrentFuncFirstLineStreamSym = nullptr; }
+  void emittedLineStreamSym(MCSymbol *StreamSym) {
+    if (!CurrentFuncFirstLineStreamSym)
+      CurrentFuncFirstLineStreamSym = StreamSym;
+  }
+  MCSymbol *getCurrentFuncFirstLineStreamSym() {
+    return CurrentFuncFirstLineStreamSym;
+  }
+
   /// When emitting an object file, create and emit a real label. When emitting
   /// textual assembly, this should do nothing to avoid polluting our output.
   virtual MCSymbol *emitCFILabel();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index c1e7f01f0eba5..53a9805cfd503 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -526,6 +526,14 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
           *DD->getCurrentFunction()))
     addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
 
+  if (Asm->OutStreamer->getGenerateFuncLineTableOffsets() &&
+      Asm->OutStreamer->getCurrentFuncFirstLineStreamSym()) {
+    addSectionLabel(
+        *SPDie, dwarf::DW_AT_META_stmt_sequence,
+        Asm->OutStreamer->getCurrentFuncFirstLineStreamSym(),
+        Asm->getObjFileLowering().getDwarfLineSection()->getBeginSymbol());
+  }
+
   // Only include DW_AT_frame_base in full debug info
   if (!includeMinimalInlineScopes()) {
     const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index b9c02aed848cc..6b987792c3db3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -170,6 +170,12 @@ static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option(
                           "Stuff")),
     cl::init(DwarfDebug::MinimizeAddrInV5::Default));
 
+static cl::opt<bool> EmitFuncLineTableOffsetsOption(
+    "emit-func-debug-line-table-offsets", cl::Hidden,
+    cl::desc("Include line table offset in function's debug info and emit end "
+             "sequence after each function's line data."),
+    cl::init(false));
+
 static constexpr unsigned ULEB128PadSize = 4;
 
 void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
@@ -440,6 +446,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
   Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
   Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64
                                                         : dwarf::DWARF32);
+  Asm->OutStreamer->setGenerateFuncLineTableOffsets(
+      EmitFuncLineTableOffsetsOption);
 }
 
 // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
@@ -2222,6 +2230,10 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
     return;
 
+  // Notify the streamer that we are beginning a function - this will reset the
+  // label pointing to the currently generated function's first line entry
+  Asm->OutStreamer->beginFunction();
+
   DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
 
   Asm->OutStreamer->getContext().setDwarfCompileUnitID(
@@ -2250,7 +2262,8 @@ void DwarfDebug::terminateLineTable(const DwarfCompileUnit *CU) {
       getDwarfCompileUnitIDForLineTable(*CU));
   // Add the last range label for the given CU.
   LineTable.getMCLineSections().addEndEntry(
-      const_cast<MCSymbol *>(CURanges.back().End));
+      const_cast<MCSymbol *>(CURanges.back().End),
+      EmitFuncLineTableOffsetsOption);
 }
 
 void DwarfDebug::skippedNonDebugFunction() {
@@ -2343,6 +2356,21 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
   // Construct call site entries.
   constructCallSiteEntryDIEs(*SP, TheCU, ScopeDIE, *MF);
 
+  // If we're emitting line table offsets, we also need to emit an end label
+  // after all function's line entries
+  if (EmitFuncLineTableOffsetsOption) {
+    MCSymbol *LineSym = Asm->OutStreamer->getContext().createTempSymbol();
+    Asm->OutStreamer->emitLabel(LineSym);
+    MCDwarfLoc DwarfLoc(
+        1, 1, 0, DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0);
+    MCDwarfLineEntry LineEntry(LineSym, DwarfLoc, /*IsEndOfFunction*/ true);
+    Asm->OutStreamer->getContext()
+        .getMCDwarfLineTable(
+            Asm->OutStreamer->getContext().getDwarfCompileUnitID())
+        .getMCLineSections()
+        .addLineEntry(LineEntry, Asm->OutStreamer->getCurrentSectionOnly());
+  }
+
   // Clear debug info
   // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
   // DbgVariables except those that are also in AbstractVariables (since they
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index aba4071e6b910..aeec79766e3d6 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -103,8 +103,18 @@ void MCDwarfLineEntry::make(MCStreamer *MCOS, MCSection *Section) {
   // Get the current .loc info saved in the context.
   const MCDwarfLoc &DwarfLoc = MCOS->getContext().getCurrentDwarfLoc();
 
+  MCSymbol *StreamLabel = nullptr;
+  // If functions need offsets into the generated line table, then we need to
+  // create a label referencing where the line was generated in the output
+  // stream
+  if (MCOS->getGenerateFuncLineTableOffsets()) {
+    StreamLabel = MCOS->getContext().createTempSymbol();
+    MCOS->emittedLineStreamSym(StreamLabel);
+  }
+
   // Create a (local) line entry with the symbol and the current .loc info.
-  MCDwarfLineEntry LineEntry(LineSym, DwarfLoc);
+  MCDwarfLineEntry LineEntry(LineSym, DwarfLoc, /*isEndOfFunction=*/false,
+                             StreamLabel);
 
   // clear DwarfLocSeen saying the current .loc info is now used.
   MCOS->getContext().clearDwarfLocSeen();
@@ -144,7 +154,8 @@ makeStartPlusIntExpr(MCContext &Ctx, const MCSymbol &Start, int IntVal) {
   return Res;
 }
 
-void MCLineSection::addEndEntry(MCSymbol *EndLabel) {
+void MCLineSection::addEndEntry(MCSymbol *EndLabel,
+                                bool generatingFuncLineTableOffsets) {
   auto *Sec = &EndLabel->getSection();
   // The line table may be empty, which we should skip adding an end entry.
   // There are two cases:
@@ -157,8 +168,12 @@ void MCLineSection::addEndEntry(MCSymbol *EndLabel) {
   if (I != MCLineDivisions.end()) {
     auto &Entries = I->second;
     auto EndEntry = Entries.back();
-    EndEntry.setEndLabel(EndLabel);
-    Entries.push_back(EndEntry);
+    // If generatingFuncLineTableOffsets is set, then we already generated an
+    // end label at the end of the last function, so skip generating another one
+    if (!generatingFuncLineTableOffsets) {
+      EndEntry.setEndLabel(EndLabel);
+      Entries.push_back(EndEntry);
+    }
   }
 }
 
@@ -187,8 +202,11 @@ void MCDwarfLineTable::emitOne(
   bool EndEntryEmitted = false;
   for (const MCDwarfLineEntry &LineEntry : LineEntries) {
     MCSymbol *Label = LineEntry.getLabel();
+    if (LineEntry.StreamLabel && MCOS->getGenerateFuncLineTableOffsets()) {
+      MCOS->emitLabel(LineEntry.StreamLabel);
+    }
     const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
-    if (LineEntry.IsEndEntry) {
+    if (LineEntry.IsEndEntry || LineEntry.IsEndOfFunction) {
       MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, Label,
                                      asmInfo->getCodePointerSize());
       init();



More information about the llvm-commits mailing list