[llvm] [DebugInfo] Add clang flag to enable function-level debug line attribution (PR #93985)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 3 07:18:33 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-binary-utilities

Author: None (alx32)

<details>
<summary>Changes</summary>

This patch introduces a new Clang flag, `-mllvm -emit-func-debug-line-table-offsets`, which enables the generation of additional debug information to facilitate the attribution of debug line data to their corresponding functions (`DW_TAG_subprogram`).

**Context:**
Currently, Clang generates function line information in the `debug_line` section without directly linking the line entries to their originating functions. This becomes problematic when post-compile tools, such as the Identical Code Folding (ICF) in the LLD linker, merge multiple functions together, resulting in overlapping address ranges and conflicting line information.

To address this issue, this patch adds a new attribute, `DW_AT_META_stmt_sequence`, to each `DW_TAG_subprogram`, which represents an offset into the line table where the line data for the subprogram begins. Additionally, a `DW_LNE_end_sequence` is added to the line table to mark the end of the line information for a particular subprogram. These changes enable the correct attribution of line entries to their originating functions, even when functions share the same address space after optimization.

**NOTE:** While this change is functional, it requires additional tooling support to be fully utilized, particularly for symbol resolution in cases where multiple functions share the same address space. This will be addressed in future changes.

**Previous changes:** [#<!-- -->93137](https://github.com/llvm/llvm-project/pull/93137) - Added an LLD option to generate debug information for ICF'ed functions.


---
Full diff: https://github.com/llvm/llvm-project/pull/93985.diff


8 Files Affected:

- (modified) llvm/include/llvm/BinaryFormat/Dwarf.def (+2) 
- (modified) llvm/include/llvm/BinaryFormat/Dwarf.h (+1) 
- (modified) llvm/include/llvm/MC/MCDwarf.h (+18-3) 
- (modified) llvm/include/llvm/MC/MCStreamer.h (+27) 
- (modified) llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp (+8) 
- (modified) llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (+29-1) 
- (modified) llvm/lib/MC/MCDwarf.cpp (+23-5) 
- (added) llvm/test/DebugInfo/X86/DW_AT_META_stmt_seq_sec_offset.ll (+68) 


``````````diff
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index adcf24eb83b03..6124babc40927 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -578,6 +578,8 @@ HANDLE_DW_AT(0x2904, GO_runtime_type, 0, GO)
 
 HANDLE_DW_AT(0x3210, UPC_threads_scaled, 0, UPC)
 
+HANDLE_DW_AT(0x3600, META_stmt_sequence, 0, META)
+
 HANDLE_DW_AT(0x393e, IBM_wsa_addr, 0, IBM)
 HANDLE_DW_AT(0x393f, IBM_home_location, 0, IBM)
 HANDLE_DW_AT(0x3940, IBM_alt_srcview, 0, IBM)
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index 74c4d6ff3a716..3e42dc240fcc0 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -84,6 +84,7 @@ enum LLVMConstants : uint32_t {
   DWARF_VENDOR_PGI,
   DWARF_VENDOR_SUN,
   DWARF_VENDOR_UPC,
+  DWARF_VENDOR_META,
   ///\}
 };
 
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index 18056c5fdf816..b6f87dc8b6c8a 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -122,6 +122,8 @@ class MCDwarfLoc {
 private: // MCContext manages these
   friend class MCContext;
   friend class MCDwarfLineEntry;
+  // DwarfDebug::endFunctionImpl needs to construct MCDwarfLoc(IsEndOfFunction)
+  friend class DwarfDebug;
 
   MCDwarfLoc(unsigned fileNum, unsigned line, unsigned column, unsigned flags,
              unsigned isa, unsigned discriminator)
@@ -194,14 +196,27 @@ class MCDwarfLineEntry : public MCDwarfLoc {
 
 public:
   // Constructor to create an MCDwarfLineEntry given a symbol and the dwarf loc.
-  MCDwarfLineEntry(MCSymbol *label, const MCDwarfLoc loc)
-      : MCDwarfLoc(loc), Label(label) {}
+  MCDwarfLineEntry(MCSymbol *label, const MCDwarfLoc loc,
+                   bool isEndOfFunction = false,
+                   MCSymbol *streamLabel = nullptr)
+      : MCDwarfLoc(loc), Label(label), IsEndOfFunction(isEndOfFunction),
+        StreamLabel(streamLabel) {}
 
   MCSymbol *getLabel() const { return Label; }
 
   // This indicates the line entry is synthesized for an end entry.
   bool IsEndEntry = false;
 
+  // This indicates that the current line entry denotes the end of a function,
+  // it is used to emit a DW_LNE_end_sequnece to reset the state machine
+  // registers.
+  bool IsEndOfFunction;
+
+  // Optional symbol to be emitted just before the line is written into the
+  // output stream. It can be used to reference the position of the start of
+  // this line's data in the output stream.
+  MCSymbol *StreamLabel;
+
   // Override the label with the given EndLabel.
   void setEndLabel(MCSymbol *EndLabel) {
     Label = EndLabel;
@@ -227,7 +242,7 @@ class MCLineSection {
 
   // Add an end entry by cloning the last entry, if exists, for the section
   // the given EndLabel belongs to. The label is replaced by the given EndLabel.
-  void addEndEntry(MCSymbol *EndLabel);
+  void addEndEntry(MCSymbol *EndLabel, bool generatingFuncLineTableOffsets);
 
   using MCDwarfLineEntryCollection = std::vector<MCDwarfLineEntry>;
   using iterator = MCDwarfLineEntryCollection::iterator;
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index b7468cf70a664..559acabee4082 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -254,6 +254,15 @@ class MCStreamer {
   /// discussion for future inclusion.
   bool AllowAutoPadding = false;
 
+  // Flag specyfing weather functions will have an offset into the line table
+  // where the line data for that function starts
+  bool GenerateFuncLineTableOffsets = false;
+
+  // Symbol that tracks the stream symbol for first line of the current function
+  // being generated. This symbol can be used to reference where the line
+  // entries for the function start in the generated line table.
+  MCSymbol *CurrentFuncFirstLineStreamSym;
+
 protected:
   MCStreamer(MCContext &Ctx);
 
@@ -310,6 +319,24 @@ class MCStreamer {
   void setAllowAutoPadding(bool v) { AllowAutoPadding = v; }
   bool getAllowAutoPadding() const { return AllowAutoPadding; }
 
+  void setGenerateFuncLineTableOffsets(bool v) {
+    GenerateFuncLineTableOffsets = v;
+  }
+  bool getGenerateFuncLineTableOffsets() const {
+    return GenerateFuncLineTableOffsets;
+  }
+
+  // Use the below functions to track the symbol that points to the current
+  // function's line info in the output stream.
+  void beginFunction() { CurrentFuncFirstLineStreamSym = nullptr; }
+  void emittedLineStreamSym(MCSymbol *StreamSym) {
+    if (!CurrentFuncFirstLineStreamSym)
+      CurrentFuncFirstLineStreamSym = StreamSym;
+  }
+  MCSymbol *getCurrentFuncFirstLineStreamSym() {
+    return CurrentFuncFirstLineStreamSym;
+  }
+
   /// When emitting an object file, create and emit a real label. When emitting
   /// textual assembly, this should do nothing to avoid polluting our output.
   virtual MCSymbol *emitCFILabel();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index c1e7f01f0eba5..53a9805cfd503 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -526,6 +526,14 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
           *DD->getCurrentFunction()))
     addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
 
+  if (Asm->OutStreamer->getGenerateFuncLineTableOffsets() &&
+      Asm->OutStreamer->getCurrentFuncFirstLineStreamSym()) {
+    addSectionLabel(
+        *SPDie, dwarf::DW_AT_META_stmt_sequence,
+        Asm->OutStreamer->getCurrentFuncFirstLineStreamSym(),
+        Asm->getObjFileLowering().getDwarfLineSection()->getBeginSymbol());
+  }
+
   // Only include DW_AT_frame_base in full debug info
   if (!includeMinimalInlineScopes()) {
     const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index b9c02aed848cc..6b987792c3db3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -170,6 +170,12 @@ static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option(
                           "Stuff")),
     cl::init(DwarfDebug::MinimizeAddrInV5::Default));
 
+static cl::opt<bool> EmitFuncLineTableOffsetsOption(
+    "emit-func-debug-line-table-offsets", cl::Hidden,
+    cl::desc("Include line table offset in function's debug info and emit end "
+             "sequence after each function's line data."),
+    cl::init(false));
+
 static constexpr unsigned ULEB128PadSize = 4;
 
 void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
@@ -440,6 +446,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
   Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
   Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64
                                                         : dwarf::DWARF32);
+  Asm->OutStreamer->setGenerateFuncLineTableOffsets(
+      EmitFuncLineTableOffsetsOption);
 }
 
 // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
@@ -2222,6 +2230,10 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
     return;
 
+  // Notify the streamer that we are beginning a function - this will reset the
+  // label pointing to the currently generated function's first line entry
+  Asm->OutStreamer->beginFunction();
+
   DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
 
   Asm->OutStreamer->getContext().setDwarfCompileUnitID(
@@ -2250,7 +2262,8 @@ void DwarfDebug::terminateLineTable(const DwarfCompileUnit *CU) {
       getDwarfCompileUnitIDForLineTable(*CU));
   // Add the last range label for the given CU.
   LineTable.getMCLineSections().addEndEntry(
-      const_cast<MCSymbol *>(CURanges.back().End));
+      const_cast<MCSymbol *>(CURanges.back().End),
+      EmitFuncLineTableOffsetsOption);
 }
 
 void DwarfDebug::skippedNonDebugFunction() {
@@ -2343,6 +2356,21 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
   // Construct call site entries.
   constructCallSiteEntryDIEs(*SP, TheCU, ScopeDIE, *MF);
 
+  // If we're emitting line table offsets, we also need to emit an end label
+  // after all function's line entries
+  if (EmitFuncLineTableOffsetsOption) {
+    MCSymbol *LineSym = Asm->OutStreamer->getContext().createTempSymbol();
+    Asm->OutStreamer->emitLabel(LineSym);
+    MCDwarfLoc DwarfLoc(
+        1, 1, 0, DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0);
+    MCDwarfLineEntry LineEntry(LineSym, DwarfLoc, /*IsEndOfFunction*/ true);
+    Asm->OutStreamer->getContext()
+        .getMCDwarfLineTable(
+            Asm->OutStreamer->getContext().getDwarfCompileUnitID())
+        .getMCLineSections()
+        .addLineEntry(LineEntry, Asm->OutStreamer->getCurrentSectionOnly());
+  }
+
   // Clear debug info
   // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
   // DbgVariables except those that are also in AbstractVariables (since they
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index aba4071e6b910..aeec79766e3d6 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -103,8 +103,18 @@ void MCDwarfLineEntry::make(MCStreamer *MCOS, MCSection *Section) {
   // Get the current .loc info saved in the context.
   const MCDwarfLoc &DwarfLoc = MCOS->getContext().getCurrentDwarfLoc();
 
+  MCSymbol *StreamLabel = nullptr;
+  // If functions need offsets into the generated line table, then we need to
+  // create a label referencing where the line was generated in the output
+  // stream
+  if (MCOS->getGenerateFuncLineTableOffsets()) {
+    StreamLabel = MCOS->getContext().createTempSymbol();
+    MCOS->emittedLineStreamSym(StreamLabel);
+  }
+
   // Create a (local) line entry with the symbol and the current .loc info.
-  MCDwarfLineEntry LineEntry(LineSym, DwarfLoc);
+  MCDwarfLineEntry LineEntry(LineSym, DwarfLoc, /*isEndOfFunction=*/false,
+                             StreamLabel);
 
   // clear DwarfLocSeen saying the current .loc info is now used.
   MCOS->getContext().clearDwarfLocSeen();
@@ -144,7 +154,8 @@ makeStartPlusIntExpr(MCContext &Ctx, const MCSymbol &Start, int IntVal) {
   return Res;
 }
 
-void MCLineSection::addEndEntry(MCSymbol *EndLabel) {
+void MCLineSection::addEndEntry(MCSymbol *EndLabel,
+                                bool generatingFuncLineTableOffsets) {
   auto *Sec = &EndLabel->getSection();
   // The line table may be empty, which we should skip adding an end entry.
   // There are two cases:
@@ -157,8 +168,12 @@ void MCLineSection::addEndEntry(MCSymbol *EndLabel) {
   if (I != MCLineDivisions.end()) {
     auto &Entries = I->second;
     auto EndEntry = Entries.back();
-    EndEntry.setEndLabel(EndLabel);
-    Entries.push_back(EndEntry);
+    // If generatingFuncLineTableOffsets is set, then we already generated an
+    // end label at the end of the last function, so skip generating another one
+    if (!generatingFuncLineTableOffsets) {
+      EndEntry.setEndLabel(EndLabel);
+      Entries.push_back(EndEntry);
+    }
   }
 }
 
@@ -187,8 +202,11 @@ void MCDwarfLineTable::emitOne(
   bool EndEntryEmitted = false;
   for (const MCDwarfLineEntry &LineEntry : LineEntries) {
     MCSymbol *Label = LineEntry.getLabel();
+    if (LineEntry.StreamLabel && MCOS->getGenerateFuncLineTableOffsets()) {
+      MCOS->emitLabel(LineEntry.StreamLabel);
+    }
     const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
-    if (LineEntry.IsEndEntry) {
+    if (LineEntry.IsEndEntry || LineEntry.IsEndOfFunction) {
       MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, Label,
                                      asmInfo->getCodePointerSize());
       init();
diff --git a/llvm/test/DebugInfo/X86/DW_AT_META_stmt_seq_sec_offset.ll b/llvm/test/DebugInfo/X86/DW_AT_META_stmt_seq_sec_offset.ll
new file mode 100644
index 0000000000000..50e0856b30327
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/DW_AT_META_stmt_seq_sec_offset.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=i686-w64-mingw32 -o %t -filetype=obj %s
+; RUN: llvm-dwarfdump -v -all %t | FileCheck %s -check-prefix=NO_STMT_SEQ
+
+; RUN: llc -mtriple=i686-w64-mingw32 -o %t -filetype=obj %s -emit-func-debug-line-table-offsets
+; RUN: llvm-dwarfdump -v -all %t | FileCheck %s -check-prefix=STMT_SEQ
+
+; NO_STMT_SEQ-NOT:      DW_AT_META_stmt_sequence
+
+; STMT_SEQ:   [2] DW_TAG_subprogram
+; STMT_SEQ:  	       DW_AT_META_stmt_sequence    DW_FORM_sec_offset
+; STMT_SEQ:   DW_TAG_subprogram [2]
+; STMT_SEQ:       DW_AT_META_stmt_sequence [DW_FORM_sec_offset]	(0x00000028)
+; STMT_SEQ:   DW_AT_name {{.*}}func01
+; STMT_SEQ:   DW_TAG_subprogram [2]
+; STMT_SEQ:       DW_AT_META_stmt_sequence [DW_FORM_sec_offset]	(0x00000038)
+; STMT_SEQ:   DW_AT_name {{.*}}main
+
+; generated from:
+; clang -g -S -emit-llvm test.c -o test.ll
+; ======= test.c ======
+; int func01() {
+;   return 1;
+; }
+; int main() {
+;   return 0;
+; }
+; =====================
+
+
+; ModuleID = 'test.c'
+source_filename = "test.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx14.0.0"
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @func01() #0 !dbg !9 {
+  ret i32 1, !dbg !13
+}
+
+; Function Attrs: noinline nounwind optnone ssp uwtable(sync)
+define i32 @main() #0 !dbg !14 {
+  %1 = alloca i32, align 4
+  store i32 0, ptr %1, align 4
+  ret i32 0, !dbg !15
+}
+
+attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "Homebrew clang version 17.0.6", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk", sdk: "MacOSX14.sdk")
+!1 = !DIFile(filename: "test.c", directory: "/tmp/clang_test")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"uwtable", i32 1}
+!7 = !{i32 7, !"frame-pointer", i32 1}
+!8 = !{!"Homebrew clang version 17.0.6"}
+!9 = distinct !DISubprogram(name: "func01", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!13 = !DILocation(line: 2, column: 3, scope: !9)
+!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !10, scopeLine: 5, spFlags: DISPFlagDefinition, unit: !0)
+!15 = !DILocation(line: 6, column: 3, scope: !14)

``````````

</details>


https://github.com/llvm/llvm-project/pull/93985


More information about the llvm-commits mailing list