[clang] [llvm] [DLCov 3/5] Implement DebugLoc origin-tracking (PR #107369)

Stephen Tozer via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 5 03:04:10 PDT 2024


https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/107369

>From e45d7e68a371a09ea766c4accf8edc6c030fd7fd Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 4 Sep 2024 12:09:50 +0100
Subject: [PATCH 1/3] Add CMake option to enable expensive line number origin
 tracking

---
 llvm/CMakeLists.txt                        |  4 ++++
 llvm/cmake/modules/HandleLLVMOptions.cmake | 12 ++++++++++++
 llvm/docs/CMake.rst                        | 11 +++++++++++
 llvm/include/llvm/Config/config.h.cmake    |  4 ++++
 4 files changed, 31 insertions(+)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 12618966c4adfd..3e2e90f5adad2e 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -524,6 +524,10 @@ endif()
 
 option(LLVM_ENABLE_CRASH_DUMPS "Turn on memory dumps on crashes. Currently only implemented on Windows." OFF)
 
+set(LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING "DISABLED" CACHE STRING
+  "Enhance debugify's line number coverage tracking; enabling this is abi-breaking. Can be DISABLED, COVERAGE, or COVERAGE_AND_ORIGIN.")
+set_property(CACHE LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING PROPERTY STRINGS DISABLED COVERAGE COVERAGE_AND_ORIGIN)
+
 set(WINDOWS_PREFER_FORWARD_SLASH_DEFAULT OFF)
 if (MINGW)
   # Cygwin doesn't identify itself as Windows, and thus gets path::Style::posix
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 5ca580fbb59c59..a4b11c149da9de 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -196,6 +196,18 @@ else()
   message(FATAL_ERROR "Unknown value for LLVM_ABI_BREAKING_CHECKS: \"${LLVM_ABI_BREAKING_CHECKS}\"!")
 endif()
 
+string(TOUPPER "${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}" uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING)
+
+if( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE" )
+  set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 )
+elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE_AND_ORIGIN" )
+  message(FATAL_ERROR "\"COVERAGE_AND_ORIGIN\" setting for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING currently unimplemented.")
+elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "DISABLED" OR NOT DEFINED LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING )
+  # The DISABLED setting is default and requires no additional defines.
+else()
+  message(FATAL_ERROR "Unknown value for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING: \"${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}\"!")
+endif()
+
 if( LLVM_REVERSE_ITERATION )
   set( LLVM_ENABLE_REVERSE_ITERATION 1 )
 endif()
diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index 2a80813999ea1e..304e22759770d9 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -475,6 +475,17 @@ enabled sub-projects. Nearly all of these variable names begin with
 **LLVM_ENABLE_BINDINGS**:BOOL
   If disabled, do not try to build the OCaml bindings.
 
+**LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING**:STRING
+  Enhances Debugify's ability to detect line number errors by storing extra
+  information inside Instructions, removing false positives from Debugify's
+  results at the cost of performance. Allowed values are `DISABLED` (default),
+  `COVERAGE`, and `COVERAGE_AND_ORIGIN`. `COVERAGE` tracks whether and why a
+  line number was intentionally dropped or not generated for an instruction,
+  allowing Debugify to avoid reporting these as errors. `COVERAGE_AND_ORIGIN`
+  additionally stores a stacktrace of the point where each DebugLoc is
+  unintentionally dropped, allowing for much easier bug triaging at the cost of
+  a ~10x performance slowdown.
+
 **LLVM_ENABLE_DIA_SDK**:BOOL
   Enable building with MSVC DIA SDK for PDB debugging support. Available
   only with MSVC. Defaults to ON.
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
index ff30741c8f360a..388ce1e8f74e3e 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -19,6 +19,10 @@
 /* Define to 1 to enable crash memory dumps, and to 0 otherwise. */
 #cmakedefine01 LLVM_ENABLE_CRASH_DUMPS
 
+/* Define to 1 to enable expensive checks for debug location coverage checking,
+   and to 0 otherwise. */
+#cmakedefine01 ENABLE_DEBUGLOC_COVERAGE_TRACKING
+
 /* Define to 1 to prefer forward slashes on Windows, and to 0 prefer
    backslashes. */
 #cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH

>From abab69ae42bf5650d6a8fff5a22341ff32effe57 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 4 Sep 2024 12:23:52 +0100
Subject: [PATCH 2/3] Add conditionally-enabled DebugLocKinds

---
 clang/lib/CodeGen/BackendUtil.cpp          | 16 +++++
 llvm/include/llvm/IR/DebugLoc.h            | 74 +++++++++++++++++++++-
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp |  5 ++
 llvm/lib/IR/DebugInfo.cpp                  |  4 +-
 llvm/lib/IR/DebugLoc.cpp                   | 16 +++++
 llvm/lib/Transforms/Utils/Debugify.cpp     | 19 ++++--
 6 files changed, 124 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index e765bbf637a661..20653daff7d4ae 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -911,6 +911,22 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
       Debugify.setOrigDIVerifyBugsReportFilePath(
           CodeGenOpts.DIBugsReportFilePath);
     Debugify.registerCallbacks(PIC, MAM);
+
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+    // If we're using debug location coverage tracking, mark all the
+    // instructions coming out of the frontend without a DebugLoc as being
+    // intentional line-zero locations, to prevent both those instructions and
+    // new instructions that inherit their location from being treated as
+    // incorrectly empty locations.
+    for (Function &F : *TheModule) {
+      if (!F.getSubprogram())
+        continue;
+      for (BasicBlock &BB : F)
+        for (Instruction &I : BB)
+          if (!I.getDebugLoc())
+            I.setDebugLoc(DebugLoc::getLineZero());
+    }
+#endif
   }
   // Attempt to load pass plugins and register their callbacks with PB.
   for (auto &PluginFN : CodeGenOpts.PassPlugins) {
diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h
index c22d3e9b10d27f..ae5f9d72c97e26 100644
--- a/llvm/include/llvm/IR/DebugLoc.h
+++ b/llvm/include/llvm/IR/DebugLoc.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_IR_DEBUGLOC_H
 #define LLVM_IR_DEBUGLOC_H
 
+#include "llvm/Config/config.h"
 #include "llvm/IR/TrackingMDRef.h"
 #include "llvm/Support/DataTypes.h"
 
@@ -22,6 +23,67 @@ namespace llvm {
   class LLVMContext;
   class raw_ostream;
   class DILocation;
+  class Function;
+
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+  // Used to represent different "kinds" of DebugLoc, expressing that a DebugLoc
+  // is either ordinary, containing a valid DILocation, or otherwise describing
+  // the reason why the DebugLoc does not contain a valid DILocation.
+  enum class DebugLocKind : uint8_t {
+    // DebugLoc is expected to contain a valid DILocation.
+    Normal,
+    // DebugLoc intentionally does not have a valid DILocation; may be for a
+    // compiler-generated instruction, or an explicitly dropped location.
+    LineZero,
+    // DebugLoc does not have a known or currently knowable source location,
+    // e.g. the attribution is ambiguous in a way that can't be represented, or
+    // determining the correct location is complicated and requires future
+    // developer effort.
+    Unknown,
+    // DebugLoc is attached to an instruction that we don't expect to be
+    // emitted, and so can omit a valid DILocation; we don't expect to ever try
+    // and emit these into the line table, and trying to do so is a sign that
+    // something has gone wrong (most likely a DebugLoc leaking from a transient
+    // compiler-generated instruction).
+    Temporary
+  };
+
+  // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify
+  // to ignore intentionally-empty DebugLocs.
+  class DILocAndCoverageTracking : public TrackingMDNodeRef {
+  public:
+    DebugLocKind Kind;
+    // Default constructor for empty DebugLocs.
+    DILocAndCoverageTracking()
+        : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {}
+    // Valid or nullptr MDNode*, normal DebugLocKind.
+    DILocAndCoverageTracking(const MDNode *Loc)
+        : TrackingMDNodeRef(const_cast<MDNode *>(Loc)),
+          Kind(DebugLocKind::Normal) {}
+    DILocAndCoverageTracking(const DILocation *Loc);
+    // Explicit DebugLocKind, which always means a nullptr MDNode*.
+    DILocAndCoverageTracking(DebugLocKind Kind)
+        : TrackingMDNodeRef(nullptr), Kind(Kind) {}
+  };
+  template <> struct simplify_type<DILocAndCoverageTracking> {
+    using SimpleType = MDNode *;
+
+    static MDNode *getSimplifiedValue(DILocAndCoverageTracking &MD) {
+      return MD.get();
+    }
+  };
+  template <> struct simplify_type<const DILocAndCoverageTracking> {
+    using SimpleType = MDNode *;
+
+    static MDNode *getSimplifiedValue(const DILocAndCoverageTracking &MD) {
+      return MD.get();
+    }
+  };
+
+  using DebugLocTrackingRef = DILocAndCoverageTracking;
+#else
+  using DebugLocTrackingRef = TrackingMDNodeRef;
+#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING
 
   /// A debug info location.
   ///
@@ -31,7 +93,8 @@ namespace llvm {
   /// To avoid extra includes, \a DebugLoc doubles the \a DILocation API with a
   /// one based on relatively opaque \a MDNode pointers.
   class DebugLoc {
-    TrackingMDNodeRef Loc;
+
+    DebugLocTrackingRef Loc;
 
   public:
     DebugLoc() = default;
@@ -47,6 +110,15 @@ namespace llvm {
     /// IR.
     explicit DebugLoc(const MDNode *N);
 
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+    DebugLoc(DebugLocKind Kind) : Loc(Kind) {}
+    DebugLocKind getKind() const { return Loc.Kind; }
+#endif
+
+    static DebugLoc getTemporary();
+    static DebugLoc getUnknown();
+    static DebugLoc getLineZero();
+
     /// Get the underlying \a DILocation.
     ///
     /// \pre !*this or \c isa<DILocation>(getAsMDNode()).
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index f88653146cc6ff..4ba8262259b112 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -31,6 +31,7 @@
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
 #include "llvm/IR/Constants.h"
@@ -2080,6 +2081,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   }
 
   if (!DL) {
+    // FIXME: We could assert that `DL.getKind() != DebugLocKind::Temporary`
+    // here, or otherwise record any temporary DebugLocs seen to ensure that
+    // transient compiler-generated instructions aren't leaking their DLs to
+    // other instructions.
     // We have an unspecified location, which might want to be line 0.
     // If we have already emitted a line-0 record, don't repeat it.
     if (LastAsmLine == 0)
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 7fa1f9696d43b2..86ac46540c5ef9 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -979,7 +979,7 @@ void Instruction::dropLocation() {
   }
 
   if (!MayLowerToCall) {
-    setDebugLoc(DebugLoc());
+    setDebugLoc(DebugLoc::getLineZero());
     return;
   }
 
@@ -998,7 +998,7 @@ void Instruction::dropLocation() {
     //
     // One alternative is to set a line 0 location with the existing scope and
     // inlinedAt info. The location might be sensitive to when inlining occurs.
-    setDebugLoc(DebugLoc());
+    setDebugLoc(DebugLoc::getLineZero());
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp
index bdea52180f74ae..501eafd0175b7b 100644
--- a/llvm/lib/IR/DebugLoc.cpp
+++ b/llvm/lib/IR/DebugLoc.cpp
@@ -11,6 +11,22 @@
 #include "llvm/IR/DebugInfo.h"
 using namespace llvm;
 
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L)
+    : TrackingMDNodeRef(const_cast<DILocation *>(L)),
+      Kind(DebugLocKind::Normal) {}
+
+DebugLoc DebugLoc::getTemporary() { return DebugLoc(DebugLocKind::Temporary); }
+DebugLoc DebugLoc::getUnknown() { return DebugLoc(DebugLocKind::Unknown); }
+DebugLoc DebugLoc::getLineZero() { return DebugLoc(DebugLocKind::LineZero); }
+
+#else
+
+DebugLoc DebugLoc::getTemporary() { return DebugLoc(); }
+DebugLoc DebugLoc::getUnknown() { return DebugLoc(); }
+DebugLoc DebugLoc::getLineZero() { return DebugLoc(); }
+#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING
+
 //===----------------------------------------------------------------------===//
 // DebugLoc Implementation
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index fcc82eadac36cf..f9f85d05ab45c5 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -292,6 +292,16 @@ bool llvm::stripDebugifyMetadata(Module &M) {
   return Changed;
 }
 
+bool hasLoc(const Instruction &I) {
+  const DILocation *Loc = I.getDebugLoc().get();
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+  DebugLocKind Kind = I.getDebugLoc().getKind();
+  return Loc || Kind != DebugLocKind::Normal;
+#else
+  return Loc;
+#endif
+}
+
 bool llvm::collectDebugInfoMetadata(Module &M,
                                     iterator_range<Module::iterator> Functions,
                                     DebugInfoPerPass &DebugInfoBeforePass,
@@ -364,9 +374,7 @@ bool llvm::collectDebugInfoMetadata(Module &M,
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
         DebugInfoBeforePass.InstToDelete.insert({&I, &I});
 
-        const DILocation *Loc = I.getDebugLoc().get();
-        bool HasLoc = Loc != nullptr;
-        DebugInfoBeforePass.DILocations.insert({&I, HasLoc});
+        DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)});
       }
     }
   }
@@ -609,10 +617,7 @@ bool llvm::checkDebugInfoMetadata(Module &M,
 
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
 
-        const DILocation *Loc = I.getDebugLoc().get();
-        bool HasLoc = Loc != nullptr;
-
-        DebugInfoAfterPass.DILocations.insert({&I, HasLoc});
+        DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)});
       }
     }
   }

>From 0d750fdbba783f000947c6ba1e09afe908044ecd Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 4 Sep 2024 16:35:46 +0100
Subject: [PATCH 3/3] Add origin-tracking support for Unix in LLVM

---
 llvm/cmake/modules/HandleLLVMOptions.cmake  |   3 +-
 llvm/include/llvm/Config/config.h.cmake     |   4 +
 llvm/include/llvm/IR/DebugLoc.h             |  56 ++++++++--
 llvm/include/llvm/Support/Signals.h         |  40 +++++++
 llvm/lib/CodeGen/BranchFolding.cpp          |   2 +-
 llvm/lib/CodeGen/BranchFolding.h            |  12 +-
 llvm/lib/IR/DebugLoc.cpp                    |  22 +++-
 llvm/lib/IR/Instruction.cpp                 |   6 +-
 llvm/lib/Support/Signals.cpp                | 116 ++++++++++++++++++++
 llvm/lib/Support/Unix/Signals.inc           |  15 +++
 llvm/lib/Support/Windows/Signals.inc        |   5 +
 llvm/lib/Transforms/Utils/Debugify.cpp      |  77 +++++++++++--
 llvm/utils/llvm-original-di-preservation.py |  22 ++--
 13 files changed, 344 insertions(+), 36 deletions(-)

diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index a4b11c149da9de..7f66e55dca13b1 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -201,7 +201,8 @@ string(TOUPPER "${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}" uppercase_LLVM_ENABLE
 if( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE" )
   set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 )
 elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE_AND_ORIGIN" )
-  message(FATAL_ERROR "\"COVERAGE_AND_ORIGIN\" setting for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING currently unimplemented.")
+  set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 )
+  set( ENABLE_DEBUGLOC_ORIGIN_TRACKING 1 )
 elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "DISABLED" OR NOT DEFINED LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING )
   # The DISABLED setting is default and requires no additional defines.
 else()
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
index 388ce1e8f74e3e..7e8f1aa9474654 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -23,6 +23,10 @@
    and to 0 otherwise. */
 #cmakedefine01 ENABLE_DEBUGLOC_COVERAGE_TRACKING
 
+/* Define to 1 to enable expensive tracking of the origin of debug location
+   coverage bugs, and to 0 otherwise. */
+#cmakedefine01 ENABLE_DEBUGLOC_ORIGIN_TRACKING
+
 /* Define to 1 to prefer forward slashes on Windows, and to 0 prefer
    backslashes. */
 #cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH
diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h
index ae5f9d72c97e26..a3e26eadb224d4 100644
--- a/llvm/include/llvm/IR/DebugLoc.h
+++ b/llvm/include/llvm/IR/DebugLoc.h
@@ -26,6 +26,22 @@ namespace llvm {
   class Function;
 
 #if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+  struct DbgLocOrigin {
+    static constexpr unsigned long MaxDepth = 16;
+    using StackTracesTy =
+        SmallVector<std::pair<int, std::array<void *, MaxDepth>>, 0>;
+    StackTracesTy StackTraces;
+    DbgLocOrigin(bool ShouldCollectTrace);
+    void addTrace();
+    const StackTracesTy &getOriginStackTraces() const { return StackTraces; };
+  };
+#else
+  struct DbgLocOrigin {
+    DbgLocOrigin(bool) {}
+  };
+#endif
+
   // Used to represent different "kinds" of DebugLoc, expressing that a DebugLoc
   // is either ordinary, containing a valid DILocation, or otherwise describing
   // the reason why the DebugLoc does not contain a valid DILocation.
@@ -48,22 +64,29 @@ namespace llvm {
     Temporary
   };
 
-  // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify
-  // to ignore intentionally-empty DebugLocs.
-  class DILocAndCoverageTracking : public TrackingMDNodeRef {
+  // Extends TrackingMDNodeRef to also store a DebugLocKind and Origin,
+  // allowing Debugify to ignore intentionally-empty DebugLocs and display the
+  // code responsible for generating unintentionally-empty DebugLocs.
+  // Currently we only need to track the Origin of this DILoc when using a
+  // DebugLoc that is Normal and empty, so only collect the origin stacktrace in
+  // those cases.
+  class DILocAndCoverageTracking : public TrackingMDNodeRef,
+                                   public DbgLocOrigin {
   public:
     DebugLocKind Kind;
     // Default constructor for empty DebugLocs.
     DILocAndCoverageTracking()
-        : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {}
-    // Valid or nullptr MDNode*, normal DebugLocKind.
+        : TrackingMDNodeRef(nullptr), DbgLocOrigin(true),
+          Kind(DebugLocKind::Normal) {}
+    // Valid or nullptr MDNode*, normal DebugLocKind
     DILocAndCoverageTracking(const MDNode *Loc)
-        : TrackingMDNodeRef(const_cast<MDNode *>(Loc)),
+        : TrackingMDNodeRef(const_cast<MDNode *>(Loc)), DbgLocOrigin(!Loc),
           Kind(DebugLocKind::Normal) {}
     DILocAndCoverageTracking(const DILocation *Loc);
-    // Explicit DebugLocKind, which always means a nullptr MDNode*.
+    // Always nullptr MDNode*, any DebugLocKind
     DILocAndCoverageTracking(DebugLocKind Kind)
-        : TrackingMDNodeRef(nullptr), Kind(Kind) {}
+        : TrackingMDNodeRef(nullptr),
+          DbgLocOrigin(Kind == DebugLocKind::Normal), Kind(Kind) {}
   };
   template <> struct simplify_type<DILocAndCoverageTracking> {
     using SimpleType = MDNode *;
@@ -115,6 +138,23 @@ namespace llvm {
     DebugLocKind getKind() const { return Loc.Kind; }
 #endif
 
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+#if !ENABLE_DEBUGLOC_COVERAGE_TRACKING
+#error Cannot enable DebugLoc origin-tracking without coverage-tracking!
+#endif
+
+    const DbgLocOrigin::StackTracesTy &getOriginStackTraces() const {
+      return Loc.getOriginStackTraces();
+    }
+    DebugLoc getCopied() const {
+      DebugLoc NewDL = *this;
+      NewDL.Loc.addTrace();
+      return NewDL;
+    }
+#else
+    DebugLoc getCopied() const { return *this; }
+#endif
+
     static DebugLoc getTemporary();
     static DebugLoc getUnknown();
     static DebugLoc getLineZero();
diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h
index 70749ce30184a7..6addb8212e20ac 100644
--- a/llvm/include/llvm/Support/Signals.h
+++ b/llvm/include/llvm/Support/Signals.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_SUPPORT_SIGNALS_H
 #define LLVM_SUPPORT_SIGNALS_H
 
+#include "llvm/Config/config.h"
+#include <array>
 #include <cstdint>
 #include <string>
 
@@ -21,6 +23,22 @@ namespace llvm {
 class StringRef;
 class raw_ostream;
 
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+// Typedefs that are convenient but only used by the StackTrace-collection code
+// added if DebugLoc origin-tracking is enabled.
+template <typename T, typename Enable> struct DenseMapInfo;
+template <typename ValueT, typename ValueInfoT> class DenseSet;
+namespace detail {
+template <typename KeyT, typename ValueT> struct DenseMapPair;
+}
+template <typename KeyT, typename ValueT, typename KeyInfoT, typename BucketT>
+class DenseMap;
+using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>;
+using SymbolizedAddressMap =
+    DenseMap<void *, std::string, DenseMapInfo<void *, void>,
+             detail::DenseMapPair<void *, std::string>>;
+#endif
+
 namespace sys {
 
   /// This function runs all the registered interrupt handlers, including the
@@ -55,6 +73,28 @@ namespace sys {
   ///        specified, the entire frame is printed.
   void PrintStackTrace(raw_ostream &OS, int Depth = 0);
 
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+#ifdef NDEBUG
+#error DebugLoc origin-tracking should not be enabled in Release builds.
+#endif
+  /// Populates the given array with a stacktrace of the current program, up to
+  /// MaxDepth frames. Returns the number of frames returned, which will be
+  /// inserted into \p StackTrace from index 0. All entries after the returned
+  /// depth will be unmodified. NB: This is only intended to be used for
+  /// introspection of LLVM by Debugify, will not be enabled in release builds,
+  /// and should not be relied on for other purposes.
+  template <unsigned long MaxDepth>
+  int getStackTrace(std::array<void *, MaxDepth> &StackTrace);
+
+  /// Takes a set of \p Addresses, symbolizes them and stores the result in the
+  /// provided \p SymbolizedAddresses map.
+  /// NB: This is only intended to be used for introspection of LLVM by
+  /// Debugify, will not be enabled in release builds, and should not be relied
+  /// on for other purposes.
+  void symbolizeAddresses(AddressSet &Addresses,
+                          SymbolizedAddressMap &SymbolizedAddresses);
+#endif
+
   // Run all registered signal handlers.
   void RunSignalHandlers();
 
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 92a03eb52e35d9..edd60c5ad4a18d 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -915,7 +915,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
   // Walk through equivalence sets looking for actual exact matches.
   while (MergePotentials.size() > 1) {
     unsigned CurHash = MergePotentials.back().getHash();
-    const DebugLoc &BranchDL = MergePotentials.back().getBranchDebugLoc();
+    const DebugLoc BranchDL = MergePotentials.back().getBranchDebugLoc();
 
     // Build SameTails, identifying the set of blocks with this hash code
     // and with the maximum number of instructions in common.
diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h
index ff2bbe06c04887..9638cfda1239d1 100644
--- a/llvm/lib/CodeGen/BranchFolding.h
+++ b/llvm/lib/CodeGen/BranchFolding.h
@@ -50,11 +50,15 @@ class TargetRegisterInfo;
     class MergePotentialsElt {
       unsigned Hash;
       MachineBasicBlock *Block;
-      DebugLoc BranchDebugLoc;
+      // We use MDNode rather than DebugLoc here because under certain CMake
+      // options*, DebugLoc may contain a SmallVector used for introspection
+      // purposes, which causes errors when stored here.
+      // *LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING=COVERAGE_AND_ORIGIN
+      MDNode *BranchDebugLoc;
 
     public:
-      MergePotentialsElt(unsigned h, MachineBasicBlock *b, DebugLoc bdl)
-          : Hash(h), Block(b), BranchDebugLoc(std::move(bdl)) {}
+      MergePotentialsElt(unsigned h, MachineBasicBlock *b, MDNode *bdl)
+          : Hash(h), Block(b), BranchDebugLoc(bdl) {}
 
       unsigned getHash() const { return Hash; }
       MachineBasicBlock *getBlock() const { return Block; }
@@ -63,7 +67,7 @@ class TargetRegisterInfo;
         Block = MBB;
       }
 
-      const DebugLoc &getBranchDebugLoc() { return BranchDebugLoc; }
+      const DebugLoc getBranchDebugLoc() { return DebugLoc(BranchDebugLoc); }
 
       bool operator<(const MergePotentialsElt &) const;
     };
diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp
index 501eafd0175b7b..ad02cd078df043 100644
--- a/llvm/lib/IR/DebugLoc.cpp
+++ b/llvm/lib/IR/DebugLoc.cpp
@@ -9,11 +9,31 @@
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/DebugInfo.h"
+
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+#include "llvm/Support/Signals.h"
+
+namespace llvm {
+DbgLocOrigin::DbgLocOrigin(bool ShouldCollectTrace) {
+  if (ShouldCollectTrace) {
+    auto &[Depth, StackTrace] = StackTraces.emplace_back();
+    Depth = sys::getStackTrace(StackTrace);
+  }
+}
+void DbgLocOrigin::addTrace() {
+  if (StackTraces.empty())
+    return;
+  auto &[Depth, StackTrace] = StackTraces.emplace_back();
+  Depth = sys::getStackTrace(StackTrace);
+}
+} // namespace llvm
+#endif
+
 using namespace llvm;
 
 #if ENABLE_DEBUGLOC_COVERAGE_TRACKING
 DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L)
-    : TrackingMDNodeRef(const_cast<DILocation *>(L)),
+    : TrackingMDNodeRef(const_cast<DILocation *>(L)), DbgLocOrigin(!L),
       Kind(DebugLocKind::Normal) {}
 
 DebugLoc DebugLoc::getTemporary() { return DebugLoc(DebugLocKind::Temporary); }
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 6f0f3f244c050c..2c0713aa886412 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -1279,6 +1279,9 @@ void Instruction::swapProfMetadata() {
 
 void Instruction::copyMetadata(const Instruction &SrcInst,
                                ArrayRef<unsigned> WL) {
+  if (WL.empty() || is_contained(WL, LLVMContext::MD_dbg))
+    setDebugLoc(SrcInst.getDebugLoc());
+
   if (!SrcInst.hasMetadata())
     return;
 
@@ -1292,8 +1295,6 @@ void Instruction::copyMetadata(const Instruction &SrcInst,
     if (WL.empty() || WLS.count(MD.first))
       setMetadata(MD.first, MD.second);
   }
-  if (WL.empty() || WLS.count(LLVMContext::MD_dbg))
-    setDebugLoc(SrcInst.getDebugLoc());
 }
 
 Instruction *Instruction::clone() const {
@@ -1311,5 +1312,6 @@ Instruction *Instruction::clone() const {
 
   New->SubclassOptionalData = SubclassOptionalData;
   New->copyMetadata(*this);
+  New->setDebugLoc(getDebugLoc().getCopied());
   return New;
 }
diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index 9f9030e79d1040..6825720f51e96d 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -253,6 +253,122 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
   return true;
 }
 
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+void sys::symbolizeAddresses(AddressSet &Addresses,
+                             SymbolizedAddressMap &SymbolizedAddresses) {
+  assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) &&
+         "Debugify origin stacktraces require symbolization to be enabled.");
+
+  // Convert Set of Addresses to ordered list.
+  SmallVector<void *, 0> AddressList(Addresses.begin(), Addresses.end());
+  if (AddressList.empty())
+    return;
+  int NumAddresses = AddressList.size();
+  llvm::sort(AddressList);
+
+  // Use llvm-symbolizer tool to symbolize the stack traces. First look for it
+  // alongside our binary, then in $PATH.
+  ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
+  if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
+    LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
+  }
+  if (!LLVMSymbolizerPathOrErr)
+    LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
+  assert(!!LLVMSymbolizerPathOrErr &&
+         "Debugify origin stacktraces require llvm-symbolizer.");
+  const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
+
+  // Try to guess the main executable name, since we don't have argv0 available
+  // here.
+  std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
+
+  BumpPtrAllocator Allocator;
+  StringSaver StrPool(Allocator);
+  std::vector<const char *> Modules(NumAddresses, nullptr);
+  std::vector<intptr_t> Offsets(NumAddresses, 0);
+  if (!findModulesAndOffsets(AddressList.data(), NumAddresses, Modules.data(),
+                             Offsets.data(), MainExecutableName.c_str(),
+                             StrPool))
+    return;
+  int InputFD;
+  SmallString<32> InputFile, OutputFile;
+  sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
+  sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile);
+  FileRemover InputRemover(InputFile.c_str());
+  FileRemover OutputRemover(OutputFile.c_str());
+
+  {
+    raw_fd_ostream Input(InputFD, true);
+    for (int i = 0; i < NumAddresses; i++) {
+      if (Modules[i])
+        Input << Modules[i] << " " << (void *)Offsets[i] << "\n";
+    }
+  }
+
+  std::optional<StringRef> Redirects[] = {InputFile.str(), OutputFile.str(),
+                                          StringRef("")};
+  StringRef Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining",
+#ifdef _WIN32
+                      // Pass --relative-address on Windows so that we don't
+                      // have to add ImageBase from PE file.
+                      // FIXME: Make this the default for llvm-symbolizer.
+                      "--relative-address",
+#endif
+                      "--demangle"};
+  int RunResult =
+      sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects);
+  if (RunResult != 0)
+    return;
+
+  // This report format is based on the sanitizer stack trace printer.  See
+  // sanitizer_stacktrace_printer.cc in compiler-rt.
+  auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
+  if (!OutputBuf)
+    return;
+  StringRef Output = OutputBuf.get()->getBuffer();
+  SmallVector<StringRef, 32> Lines;
+  Output.split(Lines, "\n");
+  auto CurLine = Lines.begin();
+  for (int i = 0; i < NumAddresses; i++) {
+    assert(!SymbolizedAddresses.contains(AddressList[i]));
+    std::string &SymbolizedAddr = SymbolizedAddresses[AddressList[i]];
+    raw_string_ostream OS(SymbolizedAddr);
+    if (!Modules[i]) {
+      OS << format_ptr(AddressList[i]) << '\n';
+      continue;
+    }
+    // Read pairs of lines (function name and file/line info) until we
+    // encounter empty line.
+    for (bool IsFirst = true;; IsFirst = false) {
+      if (CurLine == Lines.end())
+        return;
+      StringRef FunctionName = *CurLine++;
+      if (FunctionName.empty())
+        break;
+      // Add indentation for lines after the first; we use 3 spaces, because
+      // currently that aligns with the expected indentation that will be added
+      // to the first line by Debugify.
+      if (!IsFirst)
+        OS << "   ";
+      OS << format_ptr(AddressList[i]) << ' ';
+      if (!FunctionName.starts_with("??"))
+        OS << FunctionName << ' ';
+      if (CurLine == Lines.end()) {
+        OS << '\n';
+        return;
+      }
+      StringRef FileLineInfo = *CurLine++;
+      if (!FileLineInfo.starts_with("??"))
+        OS << FileLineInfo;
+      else
+        OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
+      OS << '\n';
+    }
+  }
+  return;
+}
+#endif
+
 static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName);
 
 LLVM_ATTRIBUTE_USED
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 298fde1a387cc5..d5b11a08ead835 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -499,6 +499,21 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
   return 0;
 }
 
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+#if !defined(HAVE_BACKTRACE)
+#error DebugLoc origin-tracking currently requires `backtrace()`.
+#endif
+namespace llvm {
+namespace sys {
+template <unsigned long MaxDepth>
+int getStackTrace(std::array<void *, MaxDepth> &StackTrace) {
+  return backtrace(StackTrace.data(), MaxDepth);
+}
+template int getStackTrace<16ul>(std::array<void *, 16ul> &);
+} // namespace sys
+} // namespace llvm
+#endif
+
 /// If this is an ELF platform, we can find all loaded modules and their virtual
 /// addresses with dl_iterate_phdr.
 static bool findModulesAndOffsets(void **StackTrace, int Depth,
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index 29ebf7c696e04f..c35ed744244d51 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -9,6 +9,7 @@
 // This file provides the Win32 specific implementation of the Signals class.
 //
 //===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ExitCodes.h"
 #include "llvm/Support/FileSystem.h"
@@ -538,6 +539,10 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
 extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord);
 #endif
 
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+#error DebugLoc origin-tracking currently unimplemented for Windows.
+#endif
+
 static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) {
   STACKFRAME64 StackFrame{};
   CONTEXT Context{};
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index f9f85d05ab45c5..3467f3482a1e62 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -15,7 +15,10 @@
 
 #include "llvm/Transforms/Utils/Debugify.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/InstIterator.h"
@@ -28,6 +31,11 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/JSON.h"
 #include <optional>
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+// We need the Signals header to operate on stacktraces if we're using DebugLoc
+// origin-tracking.
+#include "llvm/Support/Signals.h"
+#endif
 
 #define DEBUG_TYPE "debugify"
 
@@ -57,6 +65,49 @@ cl::opt<Level> DebugifyLevel(
 
 raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
 
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+// These maps refer to addresses in this instance of LLVM, so we can reuse them
+// everywhere - therefore, we store them at file scope.
+static DenseMap<void *, std::string> SymbolizedAddrs;
+static DenseSet<void *> UnsymbolizedAddrs;
+
+std::string symbolizeStackTrace(const Instruction *I) {
+  // We flush the set of unsymbolized addresses at the latest possible moment,
+  // i.e. now.
+  if (!UnsymbolizedAddrs.empty()) {
+    sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs);
+    UnsymbolizedAddrs.clear();
+  }
+  auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces();
+  std::string Result;
+  raw_string_ostream OS(Result);
+  for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) {
+    if (TraceIdx != 0)
+      OS << "========================================\n";
+    auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx];
+    for (int Frame = 0; Frame < Depth; ++Frame) {
+      assert(SymbolizedAddrs.contains(StackTrace[Frame]) &&
+             "Expected each address to have been symbolized.");
+      OS << right_justify(formatv("#{0}", Frame).str(), std::log10(Depth) + 2)
+         << ' ' << SymbolizedAddrs[StackTrace[Frame]];
+    }
+  }
+  return Result;
+}
+void collectStackAddresses(Instruction &I) {
+  auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces();
+  for (auto &[Depth, StackTrace] : OriginStackTraces) {
+    for (int Frame = 0; Frame < Depth; ++Frame) {
+      void *Addr = StackTrace[Frame];
+      if (!SymbolizedAddrs.contains(Addr))
+        UnsymbolizedAddrs.insert(Addr);
+    }
+  }
+}
+#else
+void collectStackAddresses(Instruction &I) {}
+#endif // ENABLE_DEBUGLOC_ORIGIN_TRACKING
+
 uint64_t getAllocSizeInBits(Module &M, Type *Ty) {
   return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0;
 }
@@ -374,6 +425,8 @@ bool llvm::collectDebugInfoMetadata(Module &M,
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
         DebugInfoBeforePass.InstToDelete.insert({&I, &I});
 
+        // Track the addresses to symbolize, if the feature is enabled.
+        collectStackAddresses(I);
         DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)});
       }
     }
@@ -449,14 +502,20 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
     auto BBName = BB->hasName() ? BB->getName() : "no-name";
     auto InstName = Instruction::getOpcodeName(Instr->getOpcode());
 
+    auto CreateJSONBugEntry = [&](const char *Action) {
+      Bugs.push_back(llvm::json::Object({
+        {"metadata", "DILocation"}, {"fn-name", FnName.str()},
+            {"bb-name", BBName.str()}, {"instr", InstName}, {"action", Action},
+#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
+            {"origin", symbolizeStackTrace(Instr)},
+#endif
+      }));
+    };
+
     auto InstrIt = DILocsBefore.find(Instr);
     if (InstrIt == DILocsBefore.end()) {
       if (ShouldWriteIntoJSON)
-        Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
-                                           {"fn-name", FnName.str()},
-                                           {"bb-name", BBName.str()},
-                                           {"instr", InstName},
-                                           {"action", "not-generate"}}));
+        CreateJSONBugEntry("not-generate");
       else
         dbg() << "WARNING: " << NameOfWrappedPass
               << " did not generate DILocation for " << *Instr
@@ -469,11 +528,7 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
       // If the instr had the !dbg attached before the pass, consider it as
       // a debug info issue.
       if (ShouldWriteIntoJSON)
-        Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"},
-                                           {"fn-name", FnName.str()},
-                                           {"bb-name", BBName.str()},
-                                           {"instr", InstName},
-                                           {"action", "drop"}}));
+        CreateJSONBugEntry("drop");
       else
         dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of "
               << *Instr << " (BB: " << BBName << ", Fn: " << FnName
@@ -617,6 +672,8 @@ bool llvm::checkDebugInfoMetadata(Module &M,
 
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
 
+        // Track the addresses to symbolize, if the feature is enabled.
+        collectStackAddresses(I);
         DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)});
       }
     }
diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py
index dc1fa518ca8e6d..a8c12252d972ca 100755
--- a/llvm/utils/llvm-original-di-preservation.py
+++ b/llvm/utils/llvm-original-di-preservation.py
@@ -13,14 +13,15 @@
 
 
 class DILocBug:
-    def __init__(self, action, bb_name, fn_name, instr):
+    def __init__(self, origin, action, bb_name, fn_name, instr):
+        self.origin = origin
         self.action = action
         self.bb_name = bb_name
         self.fn_name = fn_name
         self.instr = instr
 
     def __str__(self):
-        return self.action + self.bb_name + self.fn_name + self.instr
+        return self.action + self.bb_name + self.fn_name + self.instr + self.origin
 
 
 class DISPBug:
@@ -86,6 +87,7 @@ def generate_html_report(
         "Function Name",
         "Basic Block Name",
         "Action",
+        "Origin",
     ]
 
     for column in header_di_loc:
@@ -112,6 +114,7 @@ def generate_html_report(
                 row.append(x.fn_name)
                 row.append(x.bb_name)
                 row.append(x.action)
+                row.append(f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>")
                 row.append("    </tr>\n")
             # Dump the bugs info into the table.
             for column in row:
@@ -428,9 +431,9 @@ def Main():
         sys.exit(1)
 
     # Use the defaultdict in order to make multidim dicts.
-    di_location_bugs = defaultdict(lambda: defaultdict(dict))
-    di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
-    di_variable_bugs = defaultdict(lambda: defaultdict(dict))
+    di_location_bugs = defaultdict(lambda: defaultdict(list))
+    di_subprogram_bugs = defaultdict(lambda: defaultdict(list))
+    di_variable_bugs = defaultdict(lambda: defaultdict(list))
 
     # Use the ordered dict to make a summary.
     di_location_bugs_summary = OrderedDict()
@@ -470,9 +473,9 @@ def Main():
                 skipped_lines += 1
                 continue
 
-            di_loc_bugs = []
-            di_sp_bugs = []
-            di_var_bugs = []
+            di_loc_bugs = di_location_bugs[bugs_file][bugs_pass]
+            di_sp_bugs = di_subprogram_bugs[bugs_file][bugs_pass]
+            di_var_bugs = di_variable_bugs[bugs_file][bugs_pass]
 
             # Omit duplicated bugs.
             di_loc_set = set()
@@ -487,6 +490,7 @@ def Main():
 
                 if bugs_metadata == "DILocation":
                     try:
+                        origin = bug["origin"]
                         action = bug["action"]
                         bb_name = bug["bb-name"]
                         fn_name = bug["fn-name"]
@@ -494,7 +498,7 @@ def Main():
                     except:
                         skipped_bugs += 1
                         continue
-                    di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
+                    di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr)
                     if not str(di_loc_bug) in di_loc_set:
                         di_loc_set.add(str(di_loc_bug))
                         if opts.compress:



More information about the cfe-commits mailing list