[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)

Stephen Tozer via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jun 11 05:07:19 PDT 2025


https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143594

>From 4bbd28b23847c069445d9babe9aa8a8aac5036c1 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 10 Jun 2025 20:02:36 +0100
Subject: [PATCH] [DLCov] Origin-Tracking: Add debugify support

---
 llvm/lib/Transforms/Utils/Debugify.cpp      | 80 ++++++++++++++++++---
 llvm/utils/llvm-original-di-preservation.py | 24 ++++---
 2 files changed, 85 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 729813a92f516..01ed9de51c0b2 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -15,7 +15,10 @@
 
 #include "llvm/Transforms/Utils/Debugify.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/InstIterator.h"
@@ -28,6 +31,11 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/JSON.h"
 #include <optional>
+#if LLVM_ENABLE_DEBUGLOC_ORIGIN_TRACKING
+// We need the Signals header to operate on stacktraces if we're using DebugLoc
+// origin-tracking.
+#include "llvm/Support/Signals.h"
+#endif
 
 #define DEBUG_TYPE "debugify"
 
@@ -59,6 +67,49 @@ cl::opt<Level> DebugifyLevel(
 
 raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
 
+#if LLVM_ENABLE_DEBUGLOC_ORIGIN_TRACKING
+// These maps refer to addresses in this instance of LLVM, so we can reuse them
+// everywhere - therefore, we store them at file scope.
+static DenseMap<void *, std::string> SymbolizedAddrs;
+static DenseSet<void *> UnsymbolizedAddrs;
+
+std::string symbolizeStackTrace(const Instruction *I) {
+  // We flush the set of unsymbolized addresses at the latest possible moment,
+  // i.e. now.
+  if (!UnsymbolizedAddrs.empty()) {
+    sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs);
+    UnsymbolizedAddrs.clear();
+  }
+  auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces();
+  std::string Result;
+  raw_string_ostream OS(Result);
+  for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) {
+    if (TraceIdx != 0)
+      OS << "========================================\n";
+    auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx];
+    for (int Frame = 0; Frame < Depth; ++Frame) {
+      assert(SymbolizedAddrs.contains(StackTrace[Frame]) &&
+             "Expected each address to have been symbolized.");
+      OS << right_justify(formatv("#{0}", Frame).str(), std::log10(Depth) + 2)
+         << ' ' << SymbolizedAddrs[StackTrace[Frame]];
+    }
+  }
+  return Result;
+}
+void collectStackAddresses(Instruction &I) {
+  auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces();
+  for (auto &[Depth, StackTrace] : OriginStackTraces) {
+    for (int Frame = 0; Frame < Depth; ++Frame) {
+      void *Addr = StackTrace[Frame];
+      if (!SymbolizedAddrs.contains(Addr))
+        UnsymbolizedAddrs.insert(Addr);
+    }
+  }
+}
+#else
+void collectStackAddresses(Instruction &I) {}
+#endif // LLVM_ENABLE_DEBUGLOC_ORIGIN_TRACKING
+
 uint64_t getAllocSizeInBits(Module &M, Type *Ty) {
   return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0;
 }
@@ -379,6 +430,8 @@ bool llvm::collectDebugInfoMetadata(Module &M,
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
         DebugInfoBeforePass.InstToDelete.insert({&I, &I});
 
+        // Track the addresses to symbolize, if the feature is enabled.
+        collectStackAddresses(I);
         DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)});
       }
     }
@@ -454,14 +507,23 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
     auto BBName = BB->hasName() ? BB->getName() : "no-name";
     auto InstName = Instruction::getOpcodeName(Instr->getOpcode());
 
+    auto CreateJSONBugEntry = [&](const char *Action) {
+      Bugs.push_back(llvm::json::Object({
+          {"metadata", "DILocation"},
+          {"fn-name", FnName.str()},
+          {"bb-name", BBName.str()},
+          {"instr", InstName},
+          {"action", Action},
+#if LLVM_ENABLE_DEBUGLOC_ORIGIN_TRACKING
+          {"origin", symbolizeStackTrace(Instr)},
+#endif
+      }));
+    };
+
     auto InstrIt = DILocsBefore.find(Instr);
     if (InstrIt == DILocsBefore.end()) {
       if (ShouldWriteIntoJSON)
-        Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
-                                           {"fn-name", FnName.str()},
-                                           {"bb-name", BBName.str()},
-                                           {"instr", InstName},
-                                           {"action", "not-generate"}}));
+        CreateJSONBugEntry("not-generate");
       else
         dbg() << "WARNING: " << NameOfWrappedPass
               << " did not generate DILocation for " << *Instr
@@ -474,11 +536,7 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
       // If the instr had the !dbg attached before the pass, consider it as
       // a debug info issue.
       if (ShouldWriteIntoJSON)
-        Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
-                                           {"fn-name", FnName.str()},
-                                           {"bb-name", BBName.str()},
-                                           {"instr", InstName},
-                                           {"action", "drop"}}));
+        CreateJSONBugEntry("drop");
       else
         dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of "
               << *Instr << " (BB: " << BBName << ", Fn: " << FnName
@@ -622,6 +680,8 @@ bool llvm::checkDebugInfoMetadata(Module &M,
 
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
 
+        // Track the addresses to symbolize, if the feature is enabled.
+        collectStackAddresses(I);
         DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)});
       }
     }
diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py
index dc1fa518ca8e6..680b75691526c 100755
--- a/llvm/utils/llvm-original-di-preservation.py
+++ b/llvm/utils/llvm-original-di-preservation.py
@@ -13,14 +13,15 @@
 
 
 class DILocBug:
-    def __init__(self, action, bb_name, fn_name, instr):
+    def __init__(self, origin, action, bb_name, fn_name, instr):
+        self.origin = origin
         self.action = action
         self.bb_name = bb_name
         self.fn_name = fn_name
         self.instr = instr
 
     def __str__(self):
-        return self.action + self.bb_name + self.fn_name + self.instr
+        return self.action + self.bb_name + self.fn_name + self.instr + self.origin
 
 
 class DISPBug:
@@ -86,6 +87,7 @@ def generate_html_report(
         "Function Name",
         "Basic Block Name",
         "Action",
+        "Origin",
     ]
 
     for column in header_di_loc:
@@ -112,6 +114,9 @@ def generate_html_report(
                 row.append(x.fn_name)
                 row.append(x.bb_name)
                 row.append(x.action)
+                row.append(
+                    f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>"
+                )
                 row.append("    </tr>\n")
             # Dump the bugs info into the table.
             for column in row:
@@ -428,9 +433,9 @@ def Main():
         sys.exit(1)
 
     # Use the defaultdict in order to make multidim dicts.
-    di_location_bugs = defaultdict(lambda: defaultdict(dict))
-    di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
-    di_variable_bugs = defaultdict(lambda: defaultdict(dict))
+    di_location_bugs = defaultdict(lambda: defaultdict(list))
+    di_subprogram_bugs = defaultdict(lambda: defaultdict(list))
+    di_variable_bugs = defaultdict(lambda: defaultdict(list))
 
     # Use the ordered dict to make a summary.
     di_location_bugs_summary = OrderedDict()
@@ -470,9 +475,9 @@ def Main():
                 skipped_lines += 1
                 continue
 
-            di_loc_bugs = []
-            di_sp_bugs = []
-            di_var_bugs = []
+            di_loc_bugs = di_location_bugs[bugs_file][bugs_pass]
+            di_sp_bugs = di_subprogram_bugs[bugs_file][bugs_pass]
+            di_var_bugs = di_variable_bugs[bugs_file][bugs_pass]
 
             # Omit duplicated bugs.
             di_loc_set = set()
@@ -487,6 +492,7 @@ def Main():
 
                 if bugs_metadata == "DILocation":
                     try:
+                        origin = bug["origin"]
                         action = bug["action"]
                         bb_name = bug["bb-name"]
                         fn_name = bug["fn-name"]
@@ -494,7 +500,7 @@ def Main():
                     except:
                         skipped_bugs += 1
                         continue
-                    di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
+                    di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr)
                     if not str(di_loc_bug) in di_loc_set:
                         di_loc_set.add(str(di_loc_bug))
                         if opts.compress:



More information about the llvm-branch-commits mailing list