[llvm] Modify dwarfdump verification to allow sub-category counts (PR #125062)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 30 06:23:23 PST 2025


https://github.com/youngdfb created https://github.com/llvm/llvm-project/pull/125062

It was discovered that BOLT had several distinct issues of missing debug information by various tags for debug names (119493 & 119023 as examples), but the verification of a DWARF with llvm-dwarfdump prior to those fixes only gave one 'missing name' category.
{"error-categories":{"Name Index DIE entry missing name":{"count":36355210}},"error-count":36355210}
To more easily leverage dwarf verification for debug health, the JSON output will be improved to allow having detailed counts by a sub-category when it makes sense.  
For now, this is only implemented on the missing tags, but can be extended to more.
{"error-categories":{"Name Index DIE entry missing name":{"count":10,"details":{"DW_TAG_inlined_subroutine":1,"DW_TAG_label":1,"DW_TAG_namespace":2,"DW_TAG_subprogram":2,"DW_TAG_variable":4}}},"error-count":10}

This diff also modifies the tests created in pull request 124936 (not yet landed) to ensure the JSON switches.  Ideally this lands after that but it did not correctly create a stack of pull requests.

>From 5d326487c7a2371e3e1ceca2072106e9c03ec26f Mon Sep 17 00:00:00 2001
From: David Young <davidayoung at meta.com>
Date: Wed, 29 Jan 2025 06:32:33 -0800
Subject: [PATCH 1/3] Add test for dwarf verification JSON output

Summary:
    6244dfef5cd45f1395c66abbe061c6a7eb002676 LLVM commit added the ability
for llvm-dwarfdump to specify --verify-json and get a JSON aggregation of t
he DWARF errors.  This diff improves the testing by ensuring we validate th
e expected JSON shape.
    A follow up diff will modify the JSON and this ensures we can verify.
he DWARF errors.  This diff improves the testing by ensuring we validate th
e expected JSON shape.
    A follow up diff will modify the JSON and this ensures we can verify.

    Test Plan:
       ninja check-llvm-tools-llvm-dwarfdump
---
 ...g-names-verify--completeness-json-output.s | 172 ++++++++++++++++++
 .../X86/debug-names-verify-completeness.s     |   1 -
 2 files changed, 172 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify--completeness-json-output.s

diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify--completeness-json-output.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify--completeness-json-output.s
new file mode 100644
index 00000000000000..c4b7ffe1d2d8e7
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify--completeness-json-output.s
@@ -0,0 +1,172 @@
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj -o - | not llvm-dwarfdump -verify --verify-json=%t.json -
+# RUN: FileCheck %s --input-file %t.json
+
+# CHECK: {"error-categories":{"Name Index DIE entry missing name":{"count":10}},"error-count":10}
+# CHECK-NOT: error: Name Index @ 0x0: Entry for DIE @ {{.*}} (DW_TAG_variable) with name var_block_addr missing.
+
+        .section        .debug_loc,"", at progbits
+.Ldebug_loc0:
+        .quad   0
+        .quad   1
+        .short  .Lloc0_end-.Lloc0_start # Loc expr size
+.Lloc0_start:
+        .byte   3                       # DW_OP_addr
+        .quad 0x47
+.Lloc0_end:
+        .quad   0
+        .quad   0
+
+        .section        .debug_abbrev,"", at progbits
+        .byte   1                       # Abbreviation Code
+        .byte   17                      # DW_TAG_compile_unit
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   37                      # DW_AT_producer
+        .byte   8                       # DW_FORM_string
+        .byte   17                      # DW_AT_low_pc
+        .byte   1                       # DW_FORM_addr
+        .byte   18                      # DW_AT_high_pc
+        .byte   6                       # DW_FORM_data4
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+
+        .byte   2                       # Abbreviation Code
+        .byte   52                      # DW_TAG_variable
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   8                       # DW_FORM_string
+        .byte   2                       # DW_AT_location
+        .byte   24                      # DW_FORM_exprloc
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+
+        .byte   3                       # Abbreviation Code
+        .byte   46                      # DW_TAG_subprogram
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   3                       # DW_AT_name
+        .byte   8                       # DW_FORM_string
+        .byte   110                     # DW_AT_linkage_name
+        .byte   8                       # DW_FORM_string
+        .byte   82                      # DW_AT_entry_pc
+        .byte   1                       # DW_FORM_addr
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+
+        .byte   4                       # Abbreviation Code
+        .byte   57                      # DW_TAG_namespace
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   3                       # DW_AT_name
+        .byte   8                       # DW_FORM_string
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+
+        .byte   5                       # Abbreviation Code
+        .byte   52                      # DW_TAG_variable
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   8                       # DW_FORM_string
+        .byte   2                       # DW_AT_location
+        .byte   23                      # DW_FORM_sec_offset
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+
+        .byte   6                       # Abbreviation Code
+        .byte   57                      # DW_TAG_namespace
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+
+        .byte   7                       # Abbreviation Code
+        .byte   29                      # DW_TAG_inlined_subroutine
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   8                       # DW_FORM_string
+        .byte   17                      # DW_AT_low_pc
+        .byte   1                       # DW_FORM_addr
+        .byte   18                      # DW_AT_high_pc
+        .byte   1                       # DW_FORM_addr
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+
+        .byte   8                       # Abbreviation Code
+        .byte   10                      # DW_TAG_label
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   8                       # DW_FORM_string
+        .byte   82                      # DW_AT_entry_pc
+        .byte   1                       # DW_FORM_addr
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+
+        .byte   0                       # EOM(3)
+        .section        .debug_info,"", at progbits
+
+.Lcu_begin0:
+        .long   .Lcu_end0-.Lcu_start0   # Length of Unit
+.Lcu_start0:
+        .short  4                       # DWARF version number
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .byte   8                       # Address Size (in bytes)
+        .byte   1                       # Abbrev [1] DW_TAG_compile_unit
+        .asciz  "hand-written DWARF"    # DW_AT_producer
+        .quad   0x0                     # DW_AT_low_pc
+        .long   0x100                   # DW_AT_high_pc
+
+        .byte   4                       # Abbrev [4] DW_TAG_namespace
+        .asciz  "namesp"                # DW_AT_name
+        .byte   2                       # Abbrev [2] DW_TAG_variable
+        .asciz  "var_block_addr"        # DW_AT_name
+        .byte   9                       # DW_AT_location
+        .byte   3                       # DW_OP_addr
+        .quad   0x47
+        .byte   0                       # End Of Children Mark
+
+        .byte   6                       # Abbrev [6] DW_TAG_namespace
+        .byte   5                       # Abbrev [5] DW_TAG_variable
+        .asciz  "var_loc_addr"          # DW_AT_name
+        .long   .Ldebug_loc0            # DW_AT_location
+        .byte   0                       # End Of Children Mark
+
+        .byte   2                       # Abbrev [2] DW_TAG_variable
+        .asciz  "var_loc_tls"           # DW_AT_name
+        .byte   1                       # DW_AT_location
+        .byte   0x9b                    # DW_OP_form_tls_address
+
+        .byte   2                       # Abbrev [2] DW_TAG_variable
+        .asciz  "var_loc_gnu_tls"       # DW_AT_name
+        .byte   1                       # DW_AT_location
+        .byte   0xe0                    # DW_OP_GNU_push_tls_address
+
+        .byte   3                       # Abbrev [3] DW_TAG_subprogram
+        .asciz  "fun_name"              # DW_AT_name
+        .asciz  "_Z8fun_name"           # DW_AT_linkage_name
+        .quad   0x47                    # DW_AT_entry_pc
+        .byte   7                       # Abbrev [7] DW_TAG_inlined_subroutine
+        .asciz  "fun_inline"            # DW_AT_name
+        .quad   0x48                    # DW_AT_low_pc
+        .quad   0x49                    # DW_AT_high_pc
+        .byte   8                       # Abbrev [8] DW_TAG_label
+        .asciz  "label"                 # DW_AT_name
+        .quad   0x4a                    # DW_AT_entry_pc
+        .byte   0                       # End Of Children Mark
+
+        .byte   0                       # End Of Children Mark
+.Lcu_end0:
+
+        .section        .debug_names,"", at progbits
+        .long   .Lnames_end0-.Lnames_start0 # Header: contribution length
+.Lnames_start0:
+        .short  5                       # Header: version
+        .short  0                       # Header: padding
+        .long   1                       # Header: compilation unit count
+        .long   0                       # Header: local type unit count
+        .long   0                       # Header: foreign type unit count
+        .long   0                       # Header: bucket count
+        .long   0                       # Header: name count
+        .long   .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size
+        .long   0                       # Header: augmentation length
+        .long   .Lcu_begin0             # Compilation unit 0
+.Lnames_abbrev_start0:
+        .byte   0                       # End of abbrev list
+.Lnames_abbrev_end0:
+.Lnames_entries0:
+.Lnames_end0:
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s
index b16f8658f87eca..9886968fdab996 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s
@@ -177,4 +177,3 @@
 .Lnames_abbrev_end0:
 .Lnames_entries0:
 .Lnames_end0:
-

>From 63113cf699762747ee56a064fdf75d26dde7b5cb Mon Sep 17 00:00:00 2001
From: David Young <davidayoung at meta.com>
Date: Wed, 29 Jan 2025 12:26:55 -0800
Subject: [PATCH 2/3] Add test for dwarf verification JSON output (2/2)

---
 .../X86/debug-names-verify-completeness.s     |  1 +
 .../X86/dwarf-verify-good-json-output.s       | 32 +++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/dwarf-verify-good-json-output.s

diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s
index 9886968fdab996..b16f8658f87eca 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness.s
@@ -177,3 +177,4 @@
 .Lnames_abbrev_end0:
 .Lnames_entries0:
 .Lnames_end0:
+
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/dwarf-verify-good-json-output.s b/llvm/test/tools/llvm-dwarfdump/X86/dwarf-verify-good-json-output.s
new file mode 100644
index 00000000000000..6f4b1cdedf83ab
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/dwarf-verify-good-json-output.s
@@ -0,0 +1,32 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o - | llvm-dwarfdump -verify --verify-json=%t.json -
+# RUN: FileCheck %s --input-file %t.json
+
+# CHECK: {"error-categories":{},"error-count":0}
+
+# This test is meant to verify that the -verify option
+# in llvm-dwarfdump doesn't produce any .apple_names related
+# output when there's no such section in the object.
+# The test was manually modified to exclude the
+# .apple_names section from the apple_names_verify_num_atoms.s
+# test file in the same directory.
+
+  .section  __TEXT,__text,regular,pure_instructions
+  .file 1 "basic.c"
+  .comm _i,4,2                  ## @i
+  .comm _j,4,2                  ## @j
+  .section  __DWARF,__debug_str,regular,debug
+Linfo_string:
+  .asciz  "Apple LLVM version 8.1.0 (clang-802.0.35)" ## string offset=0
+  .asciz  "basic.c"               ## string offset=42
+  .asciz  "/Users/sgravani/Development/tests" ## string offset=50
+  .asciz  "i"                     ## string offset=84
+  .asciz  "int"                   ## string offset=86
+  .asciz  "j"                     ## string offset=90
+
+  .section  __DWARF,__debug_info,regular,debug
+Lsection_info:
+
+.subsections_via_symbols
+  .section  __DWARF,__debug_line,regular,debug
+Lsection_line:
+Lline_table_start0:

>From ebf1cab2db2d8e26b014372147b3644f52a1ab4d Mon Sep 17 00:00:00 2001
From: David Young <davidayoung at meta.com>
Date: Thu, 30 Jan 2025 05:54:39 -0800
Subject: [PATCH 3/3] Modify dwarf verification JSON to include detailed counts
 by sub-category

---
 .../llvm/DebugInfo/DWARF/DWARFVerifier.h      | 15 +++++-
 llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp    | 46 +++++++++++++++----
 ...g-names-verify-completeness-json-output.s} |  2 +-
 3 files changed, 51 insertions(+), 12 deletions(-)
 rename llvm/test/tools/llvm-dwarfdump/X86/{debug-names-verify--completeness-json-output.s => debug-names-verify-completeness-json-output.s} (98%)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index 7b51bb63cd15ba..6e79619e156cb6 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -30,9 +30,15 @@ class DWARFDebugAbbrev;
 class DataExtractor;
 struct DWARFSection;
 
+struct AggregationData {
+  unsigned OverallCount;
+  std::map<std::string, unsigned> DetailedCounts;
+  AggregationData() = default;
+};
+
 class OutputCategoryAggregator {
 private:
-  std::map<std::string, unsigned> Aggregation;
+  std::map<std::string, AggregationData> Aggregation;
   bool IncludeDetail;
 
 public:
@@ -40,8 +46,13 @@ class OutputCategoryAggregator {
       : IncludeDetail(includeDetail) {}
   void ShowDetail(bool showDetail) { IncludeDetail = showDetail; }
   size_t GetNumCategories() const { return Aggregation.size(); }
-  void Report(StringRef s, std::function<void()> detailCallback);
+  void Report(StringRef category, std::function<void()> detailCallback);
+  void Report(StringRef category, StringRef sub_category,
+              std::function<void()> detailCallback);
   void EnumerateResults(std::function<void(StringRef, unsigned)> handleCounts);
+  void EnumerateDetailedResultsFor(
+      StringRef category,
+      std::function<void(StringRef, unsigned)> handleCounts);
 };
 
 /// A class that verifies DWARF debug information given a DWARF Context.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 8bf513538de7c7..362461114b95e6 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -1942,12 +1942,14 @@ unsigned DWARFVerifier::verifyNameIndexCompleteness(
     if (none_of(NI.equal_range(Name), [&](const DWARFDebugNames::Entry &E) {
           return E.getDIEUnitOffset() == DieUnitOffset;
         })) {
-      ErrorCategory.Report("Name Index DIE entry missing name", [&]() {
-        error() << formatv(
-            "Name Index @ {0:x}: Entry for DIE @ {1:x} ({2}) with "
-            "name {3} missing.\n",
-            NI.getUnitOffset(), Die.getOffset(), Die.getTag(), Name);
-      });
+      ErrorCategory.Report(
+          "Name Index DIE entry missing name",
+          llvm::dwarf::TagString(Die.getTag()), [&]() {
+            error() << formatv(
+                "Name Index @ {0:x}: Entry for DIE @ {1:x} ({2}) with "
+                "name {3} missing.\n",
+                NI.getUnitOffset(), Die.getOffset(), Die.getTag(), Name);
+          });
       ++NumErrors;
     }
   }
@@ -2169,15 +2171,35 @@ bool DWARFVerifier::verifyDebugStrOffsets(
 
 void OutputCategoryAggregator::Report(
     StringRef s, std::function<void(void)> detailCallback) {
-  Aggregation[std::string(s)]++;
+  this->Report(s, "", detailCallback);
+}
+
+void OutputCategoryAggregator::Report(
+    StringRef category, StringRef sub_category,
+    std::function<void(void)> detailCallback) {
+  std::string category_str = std::string(category);
+  AggregationData *Agg = &Aggregation[category_str];
+  Agg->OverallCount++;
+  if (!sub_category.empty()) {
+    Agg->DetailedCounts[std::string(sub_category)]++;
+  }
   if (IncludeDetail)
     detailCallback();
 }
 
 void OutputCategoryAggregator::EnumerateResults(
     std::function<void(StringRef, unsigned)> handleCounts) {
-  for (auto &&[name, count] : Aggregation) {
-    handleCounts(name, count);
+  for (auto &&[name, aggData] : Aggregation) {
+    handleCounts(name, aggData.OverallCount);
+  }
+}
+void OutputCategoryAggregator::EnumerateDetailedResultsFor(
+    StringRef category, std::function<void(StringRef, unsigned)> handleCounts) {
+  auto Agg = Aggregation.find(std::string(category));
+  if (Agg != Aggregation.end()) {
+    for (auto &&[name, count] : Agg->second.DetailedCounts) {
+      handleCounts(name, count);
+    }
   }
 }
 
@@ -2204,6 +2226,12 @@ void DWARFVerifier::summarize() {
     ErrorCategory.EnumerateResults([&](StringRef Category, unsigned Count) {
       llvm::json::Object Val;
       Val.try_emplace("count", Count);
+      llvm::json::Object Details;
+      ErrorCategory.EnumerateDetailedResultsFor(
+          Category, [&](StringRef SubCategory, unsigned SubCount) {
+            Details.try_emplace(SubCategory, SubCount);
+          });
+      Val.try_emplace("details", std::move(Details));
       Categories.try_emplace(Category, std::move(Val));
       ErrorCount += Count;
     });
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify--completeness-json-output.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness-json-output.s
similarity index 98%
rename from llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify--completeness-json-output.s
rename to llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness-json-output.s
index c4b7ffe1d2d8e7..9c9658308c2f90 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify--completeness-json-output.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-completeness-json-output.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj -o - | not llvm-dwarfdump -verify --verify-json=%t.json -
 # RUN: FileCheck %s --input-file %t.json
 
-# CHECK: {"error-categories":{"Name Index DIE entry missing name":{"count":10}},"error-count":10}
+# CHECK: {"error-categories":{"Name Index DIE entry missing name":{"count":10,"details":{"DW_TAG_inlined_subroutine":1,"DW_TAG_label":1,"DW_TAG_namespace":2,"DW_TAG_subprogram":2,"DW_TAG_variable":4}}},"error-count":10}
 # CHECK-NOT: error: Name Index @ 0x0: Entry for DIE @ {{.*}} (DW_TAG_variable) with name var_block_addr missing.
 
         .section        .debug_loc,"", at progbits



More information about the llvm-commits mailing list