[llvm] [LLVM][DWARF] Make some effort to avoid duplicates in .debug_ranges. (PR #106614)

Kyle Huey via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 4 04:50:40 PDT 2024


https://github.com/khuey updated https://github.com/llvm/llvm-project/pull/106614

>From bf9f828b040616d223139ac8dd9840aa3688e999 Mon Sep 17 00:00:00 2001
From: Kyle Huey <khuey at kylehuey.com>
Date: Thu, 29 Aug 2024 12:17:19 -0700
Subject: [PATCH 1/3] [LLVM][DWARF] Make some effort to avoid duplicates in
 .debug_ranges.

Inlining and zero-cost abstractions tend to produce volumes of debug info with
identical ranges. When built with full debugging information (the equivalent of
-g2) librustc_driver.so has 2.1 million entries in .debug_ranges. But only 1.1
million of those entries are unique. While in principle all duplicates could be
eliminated with a hashtable, checking to see if the new range is exactly
identical to the previous range and skipping a new addition if it is is
sufficient to eliminate 99.99% of the duplicates. This reduces the size of
librustc_driver.so's .debug_ranges section by 35%, or the overall binary size a
little more than 1%.
---
 llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp | 16 ++++++++++++++--
 llvm/lib/CodeGen/AsmPrinter/DwarfFile.h   |  4 ++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index eab798c0da7843..cd1279d2021328 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -121,7 +121,19 @@ void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) {
 
 std::pair<uint32_t, RangeSpanList *>
 DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) {
-  CURangeLists.push_back(
-      RangeSpanList{Asm->createTempSymbol("debug_ranges"), &CU, std::move(R)});
+  bool CanReuseLastRange = false;
+
+  if (!CURangeLists.empty()) {
+    auto Last = CURangeLists.back();
+    if (Last.CU == &CU && Last.Ranges == R) {
+      CanReuseLastRange = true;
+    }
+  }
+
+  if (!CanReuseLastRange) {
+    CURangeLists.push_back(RangeSpanList{Asm->createTempSymbol("debug_ranges"),
+                                         &CU, std::move(R)});
+  }
+
   return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back());
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index f76858fc2f36a0..89aadccaac7f9f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -37,6 +37,10 @@ class MDNode;
 struct RangeSpan {
   const MCSymbol *Begin;
   const MCSymbol *End;
+
+  bool operator==(const RangeSpan& Other) const {
+    return Begin == Other.Begin && End == Other.End;
+  }
 };
 
 struct RangeSpanList {

>From 7e5b3e8612516517e89e819a7aeef63902993f7f Mon Sep 17 00:00:00 2001
From: Kyle Huey <khuey at kylehuey.com>
Date: Thu, 29 Aug 2024 12:33:23 -0700
Subject: [PATCH 2/3] Please clang-format.

---
 llvm/lib/CodeGen/AsmPrinter/DwarfFile.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 89aadccaac7f9f..0fc2b91ddfa913 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -38,7 +38,7 @@ struct RangeSpan {
   const MCSymbol *Begin;
   const MCSymbol *End;
 
-  bool operator==(const RangeSpan& Other) const {
+  bool operator==(const RangeSpan &Other) const {
     return Begin == Other.Begin && End == Other.End;
   }
 };

>From 50f45935de2b1bdcb70487804b3919e0e1d06f4d Mon Sep 17 00:00:00 2001
From: Kyle Huey <khuey at kylehuey.com>
Date: Tue, 3 Sep 2024 19:56:51 -0700
Subject: [PATCH 3/3] Add a test.

---
 .../Generic/debug-ranges-duplication.ll       | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 llvm/test/DebugInfo/Generic/debug-ranges-duplication.ll

diff --git a/llvm/test/DebugInfo/Generic/debug-ranges-duplication.ll b/llvm/test/DebugInfo/Generic/debug-ranges-duplication.ll
new file mode 100644
index 00000000000000..540400be740a5d
--- /dev/null
+++ b/llvm/test/DebugInfo/Generic/debug-ranges-duplication.ll
@@ -0,0 +1,70 @@
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck %s
+;
+; Generated from the following C++ source with:
+; clang -S -emit-llvm -g -O2 test.c
+;
+; /* BEGIN SOURCE */
+; void f1();
+; inline void f2() {
+;   f1();
+;   f1();
+; }
+; inline void f3() {
+;   f2();
+; }
+; void f4() {
+;   f3();
+;   f1();
+; }
+; /* END SOURCE */
+;
+; Minor complication: after generating the LLVM IR, it was manually edited so
+; that the 'f1()' call from f3 was reordered to appear between the two inlined
+; f1 calls from f2. This causes f2's inlined_subroutine to use DW_AT_ranges.
+
+; Check that identical debug ranges in succession reuse the same entry in
+; .debug_ranges rather than emitting duplicate entries.
+
+; CHECK:      DW_TAG_inlined_subroutine
+; CHECK:      DW_AT_ranges
+; CHECK-SAME: rangelist = 0x[[#%.8X,RANGE:]]
+; CHECK:      DW_TAG_inlined_subroutine
+; CHECK:      DW_AT_ranges
+; CHECK-SAME: rangelist = 0x[[#RANGE]]
+
+; Function Attrs: nounwind uwtable
+define dso_local void @f4() local_unnamed_addr !dbg !9 {
+entry:
+  tail call void (...) @f1(), !dbg !12
+  tail call void (...) @f1(), !dbg !18
+  tail call void (...) @f1(), !dbg !17
+  ret void, !dbg !19
+}
+
+declare !dbg !20 void @f1(...) local_unnamed_addr
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 9edd998e10fabfff067b9e6e5b044f85a24d0dd5)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/home/khuey/dev/llvm-project", checksumkind: CSK_MD5, checksum: "4510feb241cf078af753e3dc13205127")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 9edd998e10fabfff067b9e6e5b044f85a24d0dd5)"}
+!9 = distinct !DISubprogram(name: "f4", scope: !1, file: !1, line: 9, type: !10, scopeLine: 9, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!10 = !DISubroutineType(types: !11)
+!11 = !{null}
+!12 = !DILocation(line: 3, column: 3, scope: !13, inlinedAt: !14)
+!13 = distinct !DISubprogram(name: "f2", scope: !1, file: !1, line: 2, type: !10, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!14 = distinct !DILocation(line: 7, column: 3, scope: !15, inlinedAt: !16)
+!15 = distinct !DISubprogram(name: "f3", scope: !1, file: !1, line: 6, type: !10, scopeLine: 6, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!16 = distinct !DILocation(line: 10, column: 3, scope: !9)
+!17 = !DILocation(line: 4, column: 3, scope: !13, inlinedAt: !14)
+!18 = !DILocation(line: 11, column: 3, scope: !9)
+!19 = !DILocation(line: 12, column: 1, scope: !9)
+!20 = !DISubprogram(name: "f1", scope: !1, file: !1, line: 1, type: !10, spFlags: DISPFlagOptimized)



More information about the llvm-commits mailing list