[llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)

Stephen Tozer via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 2 07:29:58 PDT 2025


https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143594

>From 51a62f49deca9216c9058971babbb50aef3a1313 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 10 Jun 2025 20:00:51 +0100
Subject: [PATCH 1/2] [DLCov] Origin-Tracking: Core implementation

---
 llvm/include/llvm/IR/DebugLoc.h    | 49 +++++++++++++++++++++++++-----
 llvm/include/llvm/IR/Instruction.h |  2 +-
 llvm/lib/CodeGen/BranchFolding.cpp |  7 +++++
 llvm/lib/IR/DebugLoc.cpp           | 26 +++++++++++++++-
 4 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h
index 999e03b6374a5..6d79aa6b2aa01 100644
--- a/llvm/include/llvm/IR/DebugLoc.h
+++ b/llvm/include/llvm/IR/DebugLoc.h
@@ -27,6 +27,21 @@ namespace llvm {
   class Function;
 
 #if LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+  struct DbgLocOrigin {
+    static constexpr unsigned long MaxDepth = 16;
+    using StackTracesTy =
+        SmallVector<std::pair<int, std::array<void *, MaxDepth>>, 0>;
+    StackTracesTy StackTraces;
+    DbgLocOrigin(bool ShouldCollectTrace);
+    void addTrace();
+    const StackTracesTy &getOriginStackTraces() const { return StackTraces; };
+  };
+#else
+  struct DbgLocOrigin {
+    DbgLocOrigin(bool) {}
+  };
+#endif
   // Used to represent different "kinds" of DebugLoc, expressing that the
   // instruction it is part of is either normal and should contain a valid
   // DILocation, or otherwise describing the reason why the instruction does
@@ -55,22 +70,29 @@ namespace llvm {
     Temporary
   };
 
-  // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify
-  // to ignore intentionally-empty DebugLocs.
-  class DILocAndCoverageTracking : public TrackingMDNodeRef {
+  // Extends TrackingMDNodeRef to also store a DebugLocKind and Origin,
+  // allowing Debugify to ignore intentionally-empty DebugLocs and display the
+  // code responsible for generating unintentionally-empty DebugLocs.
+  // Currently we only need to track the Origin of this DILoc when using a
+  // DebugLoc that is not annotated (i.e. has DebugLocKind::Normal) and has a
+  // null DILocation, so only collect the origin stacktrace in those cases.
+  class DILocAndCoverageTracking : public TrackingMDNodeRef,
+                                   public DbgLocOrigin {
   public:
     DebugLocKind Kind;
     // Default constructor for empty DebugLocs.
     DILocAndCoverageTracking()
-        : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {}
-    // Valid or nullptr MDNode*, normal DebugLocKind.
+        : TrackingMDNodeRef(nullptr), DbgLocOrigin(true),
+          Kind(DebugLocKind::Normal) {}
+    // Valid or nullptr MDNode*, no annotative DebugLocKind.
     DILocAndCoverageTracking(const MDNode *Loc)
-        : TrackingMDNodeRef(const_cast<MDNode *>(Loc)),
+        : TrackingMDNodeRef(const_cast<MDNode *>(Loc)), DbgLocOrigin(!Loc),
           Kind(DebugLocKind::Normal) {}
     LLVM_ABI DILocAndCoverageTracking(const DILocation *Loc);
     // Explicit DebugLocKind, which always means a nullptr MDNode*.
     DILocAndCoverageTracking(DebugLocKind Kind)
-        : TrackingMDNodeRef(nullptr), Kind(Kind) {}
+        : TrackingMDNodeRef(nullptr),
+          DbgLocOrigin(Kind == DebugLocKind::Normal), Kind(Kind) {}
   };
   template <> struct simplify_type<DILocAndCoverageTracking> {
     using SimpleType = MDNode *;
@@ -187,6 +209,19 @@ namespace llvm {
 #endif // LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE
     }
 
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+    const DbgLocOrigin::StackTracesTy &getOriginStackTraces() const {
+      return Loc.getOriginStackTraces();
+    }
+    DebugLoc getCopied() const {
+      DebugLoc NewDL = *this;
+      NewDL.Loc.addTrace();
+      return NewDL;
+    }
+#else
+    DebugLoc getCopied() const { return *this; }
+#endif
+
     /// Get the underlying \a DILocation.
     ///
     /// \pre !*this or \c isa<DILocation>(getAsMDNode()).
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 8e1ef24226789..ef382a9168f24 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -507,7 +507,7 @@ class Instruction : public User,
   LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const;
 
   /// Set the debug location information for this instruction.
-  void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); }
+  void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc).getCopied(); }
 
   /// Return the debug location for this node as a DebugLoc.
   const DebugLoc &getDebugLoc() const { return DbgLoc; }
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index ff9f0ff5d5bc3..3b3e7a418feb5 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -42,6 +42,7 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
@@ -933,7 +934,13 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
 
   // Sort by hash value so that blocks with identical end sequences sort
   // together.
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+  // If origin-tracking is enabled then MergePotentialElt is no longer a POD
+  // type, so we need std::sort instead.
+  std::sort(MergePotentials.begin(), MergePotentials.end());
+#else
   array_pod_sort(MergePotentials.begin(), MergePotentials.end());
+#endif
 
   // Walk through equivalence sets looking for actual exact matches.
   while (MergePotentials.size() > 1) {
diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp
index ffeeeb6f1e4b0..79c5b896f8f25 100644
--- a/llvm/lib/IR/DebugLoc.cpp
+++ b/llvm/lib/IR/DebugLoc.cpp
@@ -9,11 +9,35 @@
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/DebugInfo.h"
+
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#include "llvm/Support/Signals.h"
+
+namespace llvm {
+DbgLocOrigin::DbgLocOrigin(bool ShouldCollectTrace) {
+  if (!ShouldCollectTrace)
+    return;
+  auto &[Depth, StackTrace] = StackTraces.emplace_back();
+  Depth = sys::getStackTrace(StackTrace);
+}
+void DbgLocOrigin::addTrace() {
+  // We only want to add new stacktraces if we already have one: addTrace exists
+  // to provide more context to how missing DebugLocs have propagated through
+  // the program, but by design if there is no existing stacktrace then we have
+  // decided not to track this DebugLoc as being "missing".
+  if (StackTraces.empty())
+    return;
+  auto &[Depth, StackTrace] = StackTraces.emplace_back();
+  Depth = sys::getStackTrace(StackTrace);
+}
+} // namespace llvm
+#endif
+
 using namespace llvm;
 
 #if LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE
 DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L)
-    : TrackingMDNodeRef(const_cast<DILocation *>(L)),
+    : TrackingMDNodeRef(const_cast<DILocation *>(L)), DbgLocOrigin(!L),
       Kind(DebugLocKind::Normal) {}
 #endif // LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE
 

>From a4ef5c6a7e80ac1366b872ff075efa3fb6038a6b Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 10 Jun 2025 20:02:36 +0100
Subject: [PATCH 2/2] [DLCov] Origin-Tracking: Add debugify support

---
 llvm/include/llvm/Support/Signals.h         |  6 +-
 llvm/lib/Transforms/Utils/Debugify.cpp      | 81 ++++++++++++++++++---
 llvm/utils/llvm-original-di-preservation.py | 24 +++---
 3 files changed, 88 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h
index 5969a0a6b230b..21b425fffef53 100644
--- a/llvm/include/llvm/Support/Signals.h
+++ b/llvm/include/llvm/Support/Signals.h
@@ -26,10 +26,8 @@
 namespace llvm {
 // Typedefs that are convenient but only used by the stack-trace-collection code
 // added if DebugLoc origin-tracking is enabled.
-using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>;
-using SymbolizedAddressMap =
-    DenseMap<void *, SmallVector<std::string, 0>, DenseMapInfo<void *, void>,
-             detail::DenseMapPair<void *, SmallVector<std::string, 0>>>;
+using AddressSet = DenseSet<void *>;
+using SymbolizedAddressMap = DenseMap<void *, SmallVector<std::string, 0>>;
 } // namespace llvm
 #endif
 
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 5f70bc442d2f0..0b57c4a2f9dae 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Transforms/Utils/Debugify.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/InstIterator.h"
@@ -28,6 +29,11 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/JSON.h"
 #include <optional>
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+// We need the Signals header to operate on stacktraces if we're using DebugLoc
+// origin-tracking.
+#include "llvm/Support/Signals.h"
+#endif
 
 #define DEBUG_TYPE "debugify"
 
@@ -59,6 +65,52 @@ cl::opt<Level> DebugifyLevel(
 
 raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
 
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+// These maps refer to addresses in this instance of LLVM, so we can reuse them
+// everywhere - therefore, we store them at file scope.
+static SymbolizedAddressMap SymbolizedAddrs;
+static AddressSet UnsymbolizedAddrs;
+
+std::string symbolizeStackTrace(const Instruction *I) {
+  // We flush the set of unsymbolized addresses at the latest possible moment,
+  // i.e. now.
+  if (!UnsymbolizedAddrs.empty()) {
+    sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs);
+    UnsymbolizedAddrs.clear();
+  }
+  auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces();
+  std::string Result;
+  raw_string_ostream OS(Result);
+  for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) {
+    if (TraceIdx != 0)
+      OS << "========================================\n";
+    auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx];
+    unsigned VirtualFrameNo = 0;
+    for (int Frame = 0; Frame < Depth; ++Frame) {
+      assert(SymbolizedAddrs.contains(StackTrace[Frame]) &&
+             "Expected each address to have been symbolized.");
+      for (std::string &SymbolizedFrame : SymbolizedAddrs[StackTrace[Frame]]) {
+        OS << right_justify(formatv("#{0}", VirtualFrameNo++).str(), std::log10(Depth) + 2)
+          << ' ' << SymbolizedFrame << '\n';
+      }
+    }
+  }
+  return Result;
+}
+void collectStackAddresses(Instruction &I) {
+  auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces();
+  for (auto &[Depth, StackTrace] : OriginStackTraces) {
+    for (int Frame = 0; Frame < Depth; ++Frame) {
+      void *Addr = StackTrace[Frame];
+      if (!SymbolizedAddrs.contains(Addr))
+        UnsymbolizedAddrs.insert(Addr);
+    }
+  }
+}
+#else
+void collectStackAddresses(Instruction &I) {}
+#endif // LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+
 uint64_t getAllocSizeInBits(Module &M, Type *Ty) {
   return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0;
 }
@@ -375,6 +427,8 @@ bool llvm::collectDebugInfoMetadata(Module &M,
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
         DebugInfoBeforePass.InstToDelete.insert({&I, &I});
 
+        // Track the addresses to symbolize, if the feature is enabled.
+        collectStackAddresses(I);
         DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)});
       }
     }
@@ -450,14 +504,23 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
     auto BBName = BB->hasName() ? BB->getName() : "no-name";
     auto InstName = Instruction::getOpcodeName(Instr->getOpcode());
 
+    auto CreateJSONBugEntry = [&](const char *Action) {
+      Bugs.push_back(llvm::json::Object({
+          {"metadata", "DILocation"},
+          {"fn-name", FnName.str()},
+          {"bb-name", BBName.str()},
+          {"instr", InstName},
+          {"action", Action},
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+          {"origin", symbolizeStackTrace(Instr)},
+#endif
+      }));
+    };
+
     auto InstrIt = DILocsBefore.find(Instr);
     if (InstrIt == DILocsBefore.end()) {
       if (ShouldWriteIntoJSON)
-        Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
-                                           {"fn-name", FnName.str()},
-                                           {"bb-name", BBName.str()},
-                                           {"instr", InstName},
-                                           {"action", "not-generate"}}));
+        CreateJSONBugEntry("not-generate");
       else
         dbg() << "WARNING: " << NameOfWrappedPass
               << " did not generate DILocation for " << *Instr
@@ -470,11 +533,7 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
       // If the instr had the !dbg attached before the pass, consider it as
       // a debug info issue.
       if (ShouldWriteIntoJSON)
-        Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
-                                           {"fn-name", FnName.str()},
-                                           {"bb-name", BBName.str()},
-                                           {"instr", InstName},
-                                           {"action", "drop"}}));
+        CreateJSONBugEntry("drop");
       else
         dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of "
               << *Instr << " (BB: " << BBName << ", Fn: " << FnName
@@ -612,6 +671,8 @@ bool llvm::checkDebugInfoMetadata(Module &M,
 
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
 
+        // Track the addresses to symbolize, if the feature is enabled.
+        collectStackAddresses(I);
         DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)});
       }
     }
diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py
index dc1fa518ca8e6..7a8752da00667 100755
--- a/llvm/utils/llvm-original-di-preservation.py
+++ b/llvm/utils/llvm-original-di-preservation.py
@@ -13,14 +13,15 @@
 
 
 class DILocBug:
-    def __init__(self, action, bb_name, fn_name, instr):
+    def __init__(self, origin, action, bb_name, fn_name, instr):
+        self.origin = origin
         self.action = action
         self.bb_name = bb_name
         self.fn_name = fn_name
         self.instr = instr
 
     def __str__(self):
-        return self.action + self.bb_name + self.fn_name + self.instr
+        return self.action + self.bb_name + self.fn_name + self.instr + self.origin
 
 
 class DISPBug:
@@ -86,6 +87,7 @@ def generate_html_report(
         "Function Name",
         "Basic Block Name",
         "Action",
+        "Origin",
     ]
 
     for column in header_di_loc:
@@ -112,6 +114,9 @@ def generate_html_report(
                 row.append(x.fn_name)
                 row.append(x.bb_name)
                 row.append(x.action)
+                row.append(
+                    f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>"
+                )
                 row.append("    </tr>\n")
             # Dump the bugs info into the table.
             for column in row:
@@ -428,9 +433,9 @@ def Main():
         sys.exit(1)
 
     # Use the defaultdict in order to make multidim dicts.
-    di_location_bugs = defaultdict(lambda: defaultdict(dict))
-    di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
-    di_variable_bugs = defaultdict(lambda: defaultdict(dict))
+    di_location_bugs = defaultdict(lambda: defaultdict(list))
+    di_subprogram_bugs = defaultdict(lambda: defaultdict(list))
+    di_variable_bugs = defaultdict(lambda: defaultdict(list))
 
     # Use the ordered dict to make a summary.
     di_location_bugs_summary = OrderedDict()
@@ -470,9 +475,9 @@ def Main():
                 skipped_lines += 1
                 continue
 
-            di_loc_bugs = []
-            di_sp_bugs = []
-            di_var_bugs = []
+            di_loc_bugs = di_location_bugs[bugs_file][bugs_pass]
+            di_sp_bugs = di_subprogram_bugs[bugs_file][bugs_pass]
+            di_var_bugs = di_variable_bugs[bugs_file][bugs_pass]
 
             # Omit duplicated bugs.
             di_loc_set = set()
@@ -487,6 +492,7 @@ def Main():
 
                 if bugs_metadata == "DILocation":
                     try:
+                        origin = bug.get("origin")
                         action = bug["action"]
                         bb_name = bug["bb-name"]
                         fn_name = bug["fn-name"]
@@ -494,7 +500,7 @@ def Main():
                     except:
                         skipped_bugs += 1
                         continue
-                    di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
+                    di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr)
                     if not str(di_loc_bug) in di_loc_set:
                         di_loc_set.add(str(di_loc_bug))
                         if opts.compress:



More information about the llvm-commits mailing list