[llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)
Stephen Tozer via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 2 07:29:58 PDT 2025
https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/143594
>From 51a62f49deca9216c9058971babbb50aef3a1313 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 10 Jun 2025 20:00:51 +0100
Subject: [PATCH 1/2] [DLCov] Origin-Tracking: Core implementation
---
llvm/include/llvm/IR/DebugLoc.h | 49 +++++++++++++++++++++++++-----
llvm/include/llvm/IR/Instruction.h | 2 +-
llvm/lib/CodeGen/BranchFolding.cpp | 7 +++++
llvm/lib/IR/DebugLoc.cpp | 26 +++++++++++++++-
4 files changed, 75 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h
index 999e03b6374a5..6d79aa6b2aa01 100644
--- a/llvm/include/llvm/IR/DebugLoc.h
+++ b/llvm/include/llvm/IR/DebugLoc.h
@@ -27,6 +27,21 @@ namespace llvm {
class Function;
#if LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+ struct DbgLocOrigin {
+ static constexpr unsigned long MaxDepth = 16;
+ using StackTracesTy =
+ SmallVector<std::pair<int, std::array<void *, MaxDepth>>, 0>;
+ StackTracesTy StackTraces;
+ DbgLocOrigin(bool ShouldCollectTrace);
+ void addTrace();
+ const StackTracesTy &getOriginStackTraces() const { return StackTraces; };
+ };
+#else
+ struct DbgLocOrigin {
+ DbgLocOrigin(bool) {}
+ };
+#endif
// Used to represent different "kinds" of DebugLoc, expressing that the
// instruction it is part of is either normal and should contain a valid
// DILocation, or otherwise describing the reason why the instruction does
@@ -55,22 +70,29 @@ namespace llvm {
Temporary
};
- // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify
- // to ignore intentionally-empty DebugLocs.
- class DILocAndCoverageTracking : public TrackingMDNodeRef {
+ // Extends TrackingMDNodeRef to also store a DebugLocKind and Origin,
+ // allowing Debugify to ignore intentionally-empty DebugLocs and display the
+ // code responsible for generating unintentionally-empty DebugLocs.
+ // Currently we only need to track the Origin of this DILoc when using a
+ // DebugLoc that is not annotated (i.e. has DebugLocKind::Normal) and has a
+ // null DILocation, so only collect the origin stacktrace in those cases.
+ class DILocAndCoverageTracking : public TrackingMDNodeRef,
+ public DbgLocOrigin {
public:
DebugLocKind Kind;
// Default constructor for empty DebugLocs.
DILocAndCoverageTracking()
- : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {}
- // Valid or nullptr MDNode*, normal DebugLocKind.
+ : TrackingMDNodeRef(nullptr), DbgLocOrigin(true),
+ Kind(DebugLocKind::Normal) {}
+ // Valid or nullptr MDNode*, no annotative DebugLocKind.
DILocAndCoverageTracking(const MDNode *Loc)
- : TrackingMDNodeRef(const_cast<MDNode *>(Loc)),
+ : TrackingMDNodeRef(const_cast<MDNode *>(Loc)), DbgLocOrigin(!Loc),
Kind(DebugLocKind::Normal) {}
LLVM_ABI DILocAndCoverageTracking(const DILocation *Loc);
// Explicit DebugLocKind, which always means a nullptr MDNode*.
DILocAndCoverageTracking(DebugLocKind Kind)
- : TrackingMDNodeRef(nullptr), Kind(Kind) {}
+ : TrackingMDNodeRef(nullptr),
+ DbgLocOrigin(Kind == DebugLocKind::Normal), Kind(Kind) {}
};
template <> struct simplify_type<DILocAndCoverageTracking> {
using SimpleType = MDNode *;
@@ -187,6 +209,19 @@ namespace llvm {
#endif // LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE
}
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+ const DbgLocOrigin::StackTracesTy &getOriginStackTraces() const {
+ return Loc.getOriginStackTraces();
+ }
+ DebugLoc getCopied() const {
+ DebugLoc NewDL = *this;
+ NewDL.Loc.addTrace();
+ return NewDL;
+ }
+#else
+ DebugLoc getCopied() const { return *this; }
+#endif
+
/// Get the underlying \a DILocation.
///
/// \pre !*this or \c isa<DILocation>(getAsMDNode()).
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 8e1ef24226789..ef382a9168f24 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -507,7 +507,7 @@ class Instruction : public User,
LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const;
/// Set the debug location information for this instruction.
- void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); }
+ void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc).getCopied(); }
/// Return the debug location for this node as a DebugLoc.
const DebugLoc &getDebugLoc() const { return DbgLoc; }
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index ff9f0ff5d5bc3..3b3e7a418feb5 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -42,6 +42,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -933,7 +934,13 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Sort by hash value so that blocks with identical end sequences sort
// together.
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+ // If origin-tracking is enabled then MergePotentialElt is no longer a POD
+ // type, so we need std::sort instead.
+ std::sort(MergePotentials.begin(), MergePotentials.end());
+#else
array_pod_sort(MergePotentials.begin(), MergePotentials.end());
+#endif
// Walk through equivalence sets looking for actual exact matches.
while (MergePotentials.size() > 1) {
diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp
index ffeeeb6f1e4b0..79c5b896f8f25 100644
--- a/llvm/lib/IR/DebugLoc.cpp
+++ b/llvm/lib/IR/DebugLoc.cpp
@@ -9,11 +9,35 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfo.h"
+
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+#include "llvm/Support/Signals.h"
+
+namespace llvm {
+DbgLocOrigin::DbgLocOrigin(bool ShouldCollectTrace) {
+ if (!ShouldCollectTrace)
+ return;
+ auto &[Depth, StackTrace] = StackTraces.emplace_back();
+ Depth = sys::getStackTrace(StackTrace);
+}
+void DbgLocOrigin::addTrace() {
+ // We only want to add new stacktraces if we already have one: addTrace exists
+ // to provide more context to how missing DebugLocs have propagated through
+ // the program, but by design if there is no existing stacktrace then we have
+ // decided not to track this DebugLoc as being "missing".
+ if (StackTraces.empty())
+ return;
+ auto &[Depth, StackTrace] = StackTraces.emplace_back();
+ Depth = sys::getStackTrace(StackTrace);
+}
+} // namespace llvm
+#endif
+
using namespace llvm;
#if LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE
DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L)
- : TrackingMDNodeRef(const_cast<DILocation *>(L)),
+ : TrackingMDNodeRef(const_cast<DILocation *>(L)), DbgLocOrigin(!L),
Kind(DebugLocKind::Normal) {}
#endif // LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE
>From a4ef5c6a7e80ac1366b872ff075efa3fb6038a6b Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Tue, 10 Jun 2025 20:02:36 +0100
Subject: [PATCH 2/2] [DLCov] Origin-Tracking: Add debugify support
---
llvm/include/llvm/Support/Signals.h | 6 +-
llvm/lib/Transforms/Utils/Debugify.cpp | 81 ++++++++++++++++++---
llvm/utils/llvm-original-di-preservation.py | 24 +++---
3 files changed, 88 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h
index 5969a0a6b230b..21b425fffef53 100644
--- a/llvm/include/llvm/Support/Signals.h
+++ b/llvm/include/llvm/Support/Signals.h
@@ -26,10 +26,8 @@
namespace llvm {
// Typedefs that are convenient but only used by the stack-trace-collection code
// added if DebugLoc origin-tracking is enabled.
-using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>;
-using SymbolizedAddressMap =
- DenseMap<void *, SmallVector<std::string, 0>, DenseMapInfo<void *, void>,
- detail::DenseMapPair<void *, SmallVector<std::string, 0>>>;
+using AddressSet = DenseSet<void *>;
+using SymbolizedAddressMap = DenseMap<void *, SmallVector<std::string, 0>>;
} // namespace llvm
#endif
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 5f70bc442d2f0..0b57c4a2f9dae 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -16,6 +16,7 @@
#include "llvm/Transforms/Utils/Debugify.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/InstIterator.h"
@@ -28,6 +29,11 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include <optional>
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+// We need the Signals header to operate on stacktraces if we're using DebugLoc
+// origin-tracking.
+#include "llvm/Support/Signals.h"
+#endif
#define DEBUG_TYPE "debugify"
@@ -59,6 +65,52 @@ cl::opt<Level> DebugifyLevel(
raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+// These maps refer to addresses in this instance of LLVM, so we can reuse them
+// everywhere - therefore, we store them at file scope.
+static SymbolizedAddressMap SymbolizedAddrs;
+static AddressSet UnsymbolizedAddrs;
+
+std::string symbolizeStackTrace(const Instruction *I) {
+ // We flush the set of unsymbolized addresses at the latest possible moment,
+ // i.e. now.
+ if (!UnsymbolizedAddrs.empty()) {
+ sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs);
+ UnsymbolizedAddrs.clear();
+ }
+ auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces();
+ std::string Result;
+ raw_string_ostream OS(Result);
+ for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) {
+ if (TraceIdx != 0)
+ OS << "========================================\n";
+ auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx];
+ unsigned VirtualFrameNo = 0;
+ for (int Frame = 0; Frame < Depth; ++Frame) {
+ assert(SymbolizedAddrs.contains(StackTrace[Frame]) &&
+ "Expected each address to have been symbolized.");
+ for (std::string &SymbolizedFrame : SymbolizedAddrs[StackTrace[Frame]]) {
+ OS << right_justify(formatv("#{0}", VirtualFrameNo++).str(), std::log10(Depth) + 2)
+ << ' ' << SymbolizedFrame << '\n';
+ }
+ }
+ }
+ return Result;
+}
+void collectStackAddresses(Instruction &I) {
+ auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces();
+ for (auto &[Depth, StackTrace] : OriginStackTraces) {
+ for (int Frame = 0; Frame < Depth; ++Frame) {
+ void *Addr = StackTrace[Frame];
+ if (!SymbolizedAddrs.contains(Addr))
+ UnsymbolizedAddrs.insert(Addr);
+ }
+ }
+}
+#else
+void collectStackAddresses(Instruction &I) {}
+#endif // LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+
uint64_t getAllocSizeInBits(Module &M, Type *Ty) {
return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0;
}
@@ -375,6 +427,8 @@ bool llvm::collectDebugInfoMetadata(Module &M,
LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n');
DebugInfoBeforePass.InstToDelete.insert({&I, &I});
+ // Track the addresses to symbolize, if the feature is enabled.
+ collectStackAddresses(I);
DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)});
}
}
@@ -450,14 +504,23 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
auto BBName = BB->hasName() ? BB->getName() : "no-name";
auto InstName = Instruction::getOpcodeName(Instr->getOpcode());
+ auto CreateJSONBugEntry = [&](const char *Action) {
+ Bugs.push_back(llvm::json::Object({
+ {"metadata", "DILocation"},
+ {"fn-name", FnName.str()},
+ {"bb-name", BBName.str()},
+ {"instr", InstName},
+ {"action", Action},
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+ {"origin", symbolizeStackTrace(Instr)},
+#endif
+ }));
+ };
+
auto InstrIt = DILocsBefore.find(Instr);
if (InstrIt == DILocsBefore.end()) {
if (ShouldWriteIntoJSON)
- Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
- {"fn-name", FnName.str()},
- {"bb-name", BBName.str()},
- {"instr", InstName},
- {"action", "not-generate"}}));
+ CreateJSONBugEntry("not-generate");
else
dbg() << "WARNING: " << NameOfWrappedPass
<< " did not generate DILocation for " << *Instr
@@ -470,11 +533,7 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
// If the instr had the !dbg attached before the pass, consider it as
// a debug info issue.
if (ShouldWriteIntoJSON)
- Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
- {"fn-name", FnName.str()},
- {"bb-name", BBName.str()},
- {"instr", InstName},
- {"action", "drop"}}));
+ CreateJSONBugEntry("drop");
else
dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of "
<< *Instr << " (BB: " << BBName << ", Fn: " << FnName
@@ -612,6 +671,8 @@ bool llvm::checkDebugInfoMetadata(Module &M,
LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n');
+ // Track the addresses to symbolize, if the feature is enabled.
+ collectStackAddresses(I);
DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)});
}
}
diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py
index dc1fa518ca8e6..7a8752da00667 100755
--- a/llvm/utils/llvm-original-di-preservation.py
+++ b/llvm/utils/llvm-original-di-preservation.py
@@ -13,14 +13,15 @@
class DILocBug:
- def __init__(self, action, bb_name, fn_name, instr):
+ def __init__(self, origin, action, bb_name, fn_name, instr):
+ self.origin = origin
self.action = action
self.bb_name = bb_name
self.fn_name = fn_name
self.instr = instr
def __str__(self):
- return self.action + self.bb_name + self.fn_name + self.instr
+ return self.action + self.bb_name + self.fn_name + self.instr + self.origin
class DISPBug:
@@ -86,6 +87,7 @@ def generate_html_report(
"Function Name",
"Basic Block Name",
"Action",
+ "Origin",
]
for column in header_di_loc:
@@ -112,6 +114,9 @@ def generate_html_report(
row.append(x.fn_name)
row.append(x.bb_name)
row.append(x.action)
+ row.append(
+ f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>"
+ )
row.append(" </tr>\n")
# Dump the bugs info into the table.
for column in row:
@@ -428,9 +433,9 @@ def Main():
sys.exit(1)
# Use the defaultdict in order to make multidim dicts.
- di_location_bugs = defaultdict(lambda: defaultdict(dict))
- di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
- di_variable_bugs = defaultdict(lambda: defaultdict(dict))
+ di_location_bugs = defaultdict(lambda: defaultdict(list))
+ di_subprogram_bugs = defaultdict(lambda: defaultdict(list))
+ di_variable_bugs = defaultdict(lambda: defaultdict(list))
# Use the ordered dict to make a summary.
di_location_bugs_summary = OrderedDict()
@@ -470,9 +475,9 @@ def Main():
skipped_lines += 1
continue
- di_loc_bugs = []
- di_sp_bugs = []
- di_var_bugs = []
+ di_loc_bugs = di_location_bugs[bugs_file][bugs_pass]
+ di_sp_bugs = di_subprogram_bugs[bugs_file][bugs_pass]
+ di_var_bugs = di_variable_bugs[bugs_file][bugs_pass]
# Omit duplicated bugs.
di_loc_set = set()
@@ -487,6 +492,7 @@ def Main():
if bugs_metadata == "DILocation":
try:
+ origin = bug.get("origin")
action = bug["action"]
bb_name = bug["bb-name"]
fn_name = bug["fn-name"]
@@ -494,7 +500,7 @@ def Main():
except:
skipped_bugs += 1
continue
- di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
+ di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr)
if not str(di_loc_bug) in di_loc_set:
di_loc_set.add(str(di_loc_bug))
if opts.compress:
More information about the llvm-commits
mailing list