[llvm] [DwarfDump] Add new set of line-table-related statistics to llvm-dwarfdump (PR #93289)

Stephen Tozer via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 7 01:17:06 PDT 2024


https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/93289

>From 8a0bb3b1401652b99f23d14d335e370fd9113eef Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Thu, 23 May 2024 18:59:18 +0100
Subject: [PATCH 1/4] [DwarfDump] Add new set of line-related statistics to
 llvm-dwarfdump

This patch adds a new set of statistics to llvm-dwarfdump that provide
additional information about .debug_line regarding the number of bytes
covered by the line table (and how many of those are covered by line 0
entries), and the number of entries within the table and how many of those
are is_stmt, unique, or unique and non-line-0 (where "uniqueness" is based
on file, line, and column only).

Collectively these give a little more insight into the state of debug line
information, rather than variables (as most of the dwarfdump statistics are
currently oriented towards). I've added all of the stats that were useful to
some degree, but I think the most generally useful stat is "unique line
entries", since it gives the most straightforward indication of regressions,
i.e. when the number goes down it means that fewer source lines are
reachable in the program.
---
 .../test/tools/llvm-dwarfdump/X86/locstats.ll |  6 ++
 llvm/tools/llvm-dwarfdump/Statistics.cpp      | 78 +++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll b/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
index f850119acb000..415f092dc7da7 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
+++ b/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
@@ -89,6 +89,12 @@
 ; CHECK-NEXT: "#local vars - entry values with [80%,90%) of parent scope covered by DW_AT_location": 1,
 ; CHECK-NEXT: "#local vars - entry values with [90%,100%) of parent scope covered by DW_AT_location": 0,
 ; CHECK-NEXT: "#local vars - entry values with 100% of parent scope covered by DW_AT_location": 1
+; CHECK-NEXT: "#bytes with line information": 51,
+; CHECK-NEXT: "#bytes with line-0 locations": 3,
+; CHECK-NEXT: "#line entries": 7,
+; CHECK-NEXT: "#line entries marked is_stmt": 5,
+; CHECK-NEXT: "#line entries (unique)": 6,
+; CHECK-NEXT: "#line entries (unique non-0)": 5
 
 ; The source code of the test case:
 ; extern void fn3(int *);
diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp
index 96841c3c387bd..28e597ba8894f 100644
--- a/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm-dwarfdump.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
@@ -188,6 +189,16 @@ struct LocationStats {
   /// Total number of local variables processed.
   SaturatingUINT64 NumVar = 0;
 };
+
+/// Holds accumulated debug line statistics across all CUs.
+struct LineStats {
+  SaturatingUINT64 NumBytes = 0;
+  SaturatingUINT64 NumLineZeroBytes = 0;
+  SaturatingUINT64 NumEntries = 0;
+  SaturatingUINT64 NumIsStmtEntries = 0;
+  SaturatingUINT64 NumUniqueEntries = 0;
+  SaturatingUINT64 NumUniqueNonZeroEntries = 0;
+};
 } // namespace
 
 /// Collect debug location statistics for one DIE.
@@ -848,6 +859,7 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
   StringRef FormatName = Obj.getFileFormatName();
   GlobalStats GlobalStats;
   LocationStats LocStats;
+  LineStats LnStats;
   StringMap<PerFunctionStats> Statistics;
   // This variable holds variable information for functions with
   // abstract_origin globally, across all CUs.
@@ -856,6 +868,12 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
   // abstract_origin.
   FunctionDIECUTyMap AbstractOriginFnCUs;
   CrossCUReferencingDIELocationTy CrossCUReferencesToBeResolved;
+  // Line, Col, File
+  using LineTuple = std::tuple<uint32_t, uint16_t, uint16_t>;
+  SmallVector<std::string> Files;
+  DenseSet<LineTuple> UniqueLines;
+  DenseSet<LineTuple> UniqueNonZeroLines;
+
   for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units()) {
     if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false)) {
       // This variable holds variable information for functions with
@@ -882,8 +900,58 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
         CrossCUReferencesToBeResolved.push_back(
             DIELocation(CUDie.getDwarfUnit(), CrossCUReferencingDIEOffset));
     }
+    if (const auto *LineTable = DICtx.getLineTableForUnit(CU.get())) {
+      auto LastFileIdxOpt = LineTable->getLastValidFileIndex();
+      // Each CU has its own file index; in order to track unique line entries
+      // across CUs, we therefore need to map each CU file index to a global
+      // file index, which we store here.
+      DenseMap<uint64_t, uint16_t> CUFileMapping;
+      if (LastFileIdxOpt) {
+        std::string File;
+        for (uint64_t FileIdx = 0; FileIdx <= *LastFileIdxOpt; ++FileIdx) {
+          if (LineTable->getFileNameByIndex(
+                  FileIdx, CU->getCompilationDir(),
+                  DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+                  File)) {
+            auto ExistingFile = llvm::find(Files, File);
+            if (ExistingFile != Files.end()) {
+              CUFileMapping[FileIdx] =
+                  std::distance(Files.begin(), ExistingFile);
+            } else {
+              CUFileMapping[FileIdx] = Files.size();
+              Files.push_back(File);
+            }
+          }
+        }
+      }
+      for (auto Seq : LineTable->Sequences) {
+        LnStats.NumBytes += Seq.HighPC - Seq.LowPC;
+        // Ignore the `end_sequence` entry, since it's not interesting for us.
+        LnStats.NumEntries += Seq.LastRowIndex - Seq.FirstRowIndex - 1;
+        for (size_t RowIdx = Seq.FirstRowIndex; RowIdx < Seq.LastRowIndex - 1;
+             ++RowIdx) {
+          auto Entry = LineTable->Rows[RowIdx];
+          if (Entry.IsStmt)
+            LnStats.NumIsStmtEntries += 1;
+          assert(CUFileMapping.contains(Entry.File) &&
+                 "Should have been collected earlier!");
+          uint16_t MappedFile = CUFileMapping[Entry.File];
+          UniqueLines.insert({Entry.Line, Entry.Column, MappedFile});
+          if (Entry.Line != 0) {
+            UniqueNonZeroLines.insert({Entry.Line, Entry.Column, MappedFile});
+          } else {
+            auto EntryStartAddress = Entry.Address.Address;
+            auto EntryEndAddress = LineTable->Rows[RowIdx + 1].Address.Address;
+            LnStats.NumLineZeroBytes += EntryEndAddress - EntryStartAddress;
+          }
+        }
+      }
+    }
   }
 
+  LnStats.NumUniqueEntries = UniqueLines.size();
+  LnStats.NumUniqueNonZeroEntries = UniqueNonZeroLines.size();
+
   /// Resolve CrossCU references.
   collectZeroLocCovForVarsWithCrossCUReferencingAbstractOrigin(
       LocStats, AbstractOriginFnCUs, GlobalAbstractOriginFnInfo,
@@ -1043,6 +1111,16 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
   printLocationStats(J, "#local vars", LocStats.LocalVarLocStats);
   printLocationStats(J, "#local vars - entry values",
                      LocStats.LocalVarNonEntryValLocStats);
+
+  // Print line statistics for the object file.
+  printDatum(J, "#bytes with line information", LnStats.NumBytes.Value);
+  printDatum(J, "#bytes with line-0 locations", LnStats.NumLineZeroBytes.Value);
+  printDatum(J, "#line entries", LnStats.NumEntries.Value);
+  printDatum(J, "#line entries marked is_stmt", LnStats.NumIsStmtEntries.Value);
+  printDatum(J, "#line entries (unique)", LnStats.NumUniqueEntries.Value);
+  printDatum(J, "#line entries (unique non-0)",
+             LnStats.NumUniqueNonZeroEntries.Value);
+
   J.objectEnd();
   OS << '\n';
   LLVM_DEBUG(

>From e884612c9eddd5a80426167083bb088d5e5db4b0 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Fri, 24 May 2024 14:14:58 +0100
Subject: [PATCH 2/4] Address review comments

---
 llvm/tools/llvm-dwarfdump/Statistics.cpp | 40 +++++++++++++-----------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp
index 28e597ba8894f..b40ff208b99b3 100644
--- a/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -868,7 +868,9 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
   // abstract_origin.
   FunctionDIECUTyMap AbstractOriginFnCUs;
   CrossCUReferencingDIELocationTy CrossCUReferencesToBeResolved;
-  // Line, Col, File
+  // Tuple representing a single source code position in the line table. Fields
+  // are respectively: Line, Col, File, where 'File' is an index into the Files
+  // vector below.
   using LineTuple = std::tuple<uint32_t, uint16_t, uint16_t>;
   SmallVector<std::string> Files;
   DenseSet<LineTuple> UniqueLines;
@@ -900,31 +902,31 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
         CrossCUReferencesToBeResolved.push_back(
             DIELocation(CUDie.getDwarfUnit(), CrossCUReferencingDIEOffset));
     }
-    if (const auto *LineTable = DICtx.getLineTableForUnit(CU.get())) {
-      auto LastFileIdxOpt = LineTable->getLastValidFileIndex();
+    const auto *LineTable = DICtx.getLineTableForUnit(CU.get());
+    std::optional<uint64_t> LastFileIdxOpt;
+    if (LineTable)
+      LastFileIdxOpt = LineTable->getLastValidFileIndex();
+    if (LastFileIdxOpt) {
       // Each CU has its own file index; in order to track unique line entries
       // across CUs, we therefore need to map each CU file index to a global
       // file index, which we store here.
       DenseMap<uint64_t, uint16_t> CUFileMapping;
-      if (LastFileIdxOpt) {
-        std::string File;
-        for (uint64_t FileIdx = 0; FileIdx <= *LastFileIdxOpt; ++FileIdx) {
-          if (LineTable->getFileNameByIndex(
-                  FileIdx, CU->getCompilationDir(),
-                  DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
-                  File)) {
-            auto ExistingFile = llvm::find(Files, File);
-            if (ExistingFile != Files.end()) {
-              CUFileMapping[FileIdx] =
-                  std::distance(Files.begin(), ExistingFile);
-            } else {
-              CUFileMapping[FileIdx] = Files.size();
-              Files.push_back(File);
-            }
+      std::string File;
+      for (uint64_t FileIdx = 0; FileIdx <= *LastFileIdxOpt; ++FileIdx) {
+        if (LineTable->getFileNameByIndex(
+                FileIdx, CU->getCompilationDir(),
+                DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+                File)) {
+          auto ExistingFile = llvm::find(Files, File);
+          if (ExistingFile != Files.end()) {
+            CUFileMapping[FileIdx] = std::distance(Files.begin(), ExistingFile);
+          } else {
+            CUFileMapping[FileIdx] = Files.size();
+            Files.push_back(File);
           }
         }
       }
-      for (auto Seq : LineTable->Sequences) {
+      for (const auto &Seq : LineTable->Sequences) {
         LnStats.NumBytes += Seq.HighPC - Seq.LowPC;
         // Ignore the `end_sequence` entry, since it's not interesting for us.
         LnStats.NumEntries += Seq.LastRowIndex - Seq.FirstRowIndex - 1;

>From f36d004c1160af7ed599c736a1fb5c84128fe41d Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Fri, 24 May 2024 18:32:29 +0100
Subject: [PATCH 3/4] Address further review comments

---
 llvm/test/tools/llvm-dwarfdump/X86/locstats.ll | 6 ++++++
 llvm/tools/llvm-dwarfdump/Statistics.cpp       | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll b/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
index 415f092dc7da7..9e36524e0fdcf 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
+++ b/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
@@ -1,5 +1,11 @@
 ; RUN: llc -debug-entry-values %s -o - -filetype=obj \
 ; RUN:   | llvm-dwarfdump -statistics - | FileCheck %s
+; RUN: llc -debug-entry-values --dwarf-version=4 %s -o - -filetype=obj \
+; RUN:   | llvm-dwarfdump -statistics - | FileCheck %s
+; RUN: llc -debug-entry-values --dwarf-version=3 %s -o - -filetype=obj \
+; RUN:   | llvm-dwarfdump -statistics - | FileCheck %s
+; RUN: llc -debug-entry-values --dwarf-version=2 %s -o - -filetype=obj \
+; RUN:   | llvm-dwarfdump -statistics - | FileCheck %s
 
 ; CHECK:      "sum_all_variables(#bytes in parent scope covered by DW_OP_entry_value)": 5,
 ; CHECK-NEXT: "sum_all_params(#bytes in parent scope)": 20,
diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp
index b40ff208b99b3..c46049bad666e 100644
--- a/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -911,8 +911,8 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
       // across CUs, we therefore need to map each CU file index to a global
       // file index, which we store here.
       DenseMap<uint64_t, uint16_t> CUFileMapping;
-      std::string File;
       for (uint64_t FileIdx = 0; FileIdx <= *LastFileIdxOpt; ++FileIdx) {
+        std::string File;
         if (LineTable->getFileNameByIndex(
                 FileIdx, CU->getCompilationDir(),
                 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,

>From bfb20ed662a6d56baa0af25f35d502a94728f2c7 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Fri, 7 Jun 2024 09:16:48 +0100
Subject: [PATCH 4/4] Change name of unique is_stmt lines stat

---
 llvm/test/tools/llvm-dwarfdump/X86/locstats.ll | 2 +-
 llvm/tools/llvm-dwarfdump/Statistics.cpp       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll b/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
index 9e36524e0fdcf..cb5908976fb6c 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
+++ b/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
@@ -98,7 +98,7 @@
 ; CHECK-NEXT: "#bytes with line information": 51,
 ; CHECK-NEXT: "#bytes with line-0 locations": 3,
 ; CHECK-NEXT: "#line entries": 7,
-; CHECK-NEXT: "#line entries marked is_stmt": 5,
+; CHECK-NEXT: "#line entries (is_stmt)": 5,
 ; CHECK-NEXT: "#line entries (unique)": 6,
 ; CHECK-NEXT: "#line entries (unique non-0)": 5
 
diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp
index c46049bad666e..10ce6093909b6 100644
--- a/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -1118,7 +1118,7 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
   printDatum(J, "#bytes with line information", LnStats.NumBytes.Value);
   printDatum(J, "#bytes with line-0 locations", LnStats.NumLineZeroBytes.Value);
   printDatum(J, "#line entries", LnStats.NumEntries.Value);
-  printDatum(J, "#line entries marked is_stmt", LnStats.NumIsStmtEntries.Value);
+  printDatum(J, "#line entries (is_stmt)", LnStats.NumIsStmtEntries.Value);
   printDatum(J, "#line entries (unique)", LnStats.NumUniqueEntries.Value);
   printDatum(J, "#line entries (unique non-0)",
              LnStats.NumUniqueNonZeroEntries.Value);



More information about the llvm-commits mailing list