[clang] [llvm] [DLCov 2/5] Implement DebugLoc coverage tracking (PR #107279)

via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 4 11:05:31 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Stephen Tozer (SLTozer)

<details>
<summary>Changes</summary>

This is part of a series of patches that tries to improve DILocation bug detection in Debugify; see below for more details. This is the patch that adds the main feature, adding a set of `DebugLoc::get<Kind>` functions that can be used for instructions with intentionally empty DebugLocs to prevent Debugify from treating them as bugs, removing the currently-pervasive false positives and allowing us to use Debugify (in its original DI preservation mode) to reliably detect existing bugs and regressions. This patch does not add uses of these functions, except for once in Clang before optimizations, and in `Instruction::dropLocation()`, since that is an obvious case that immediately removes a set of false positives.

--------------------

This series of patches adds a "DebugLoc coverage tracking" feature, that inserts conditionally-compiled tracking information into DebugLocs (and by extension, to Instructions), which is used by Debugify to provide more accurate and detailed coverage reports. When enabled, this features tracks whether and why we have intentionally dropped a DebugLoc, allowing Debugify to ignore false positives. An optional additional feature allows also storing a stack trace of the point where a DebugLoc was unintentionally dropped/not generated, which is used to make fixing detected errors significantly easier. The goal of these features is to provide useful tools for developers to fix existing DebugLoc errors and allow reliable detection of regressions by either manual inspection or an automated script. 

Previous: https://github.com/llvm/llvm-project/pull/107278

---
Full diff: https://github.com/llvm/llvm-project/pull/107279.diff


10 Files Affected:

- (modified) clang/lib/CodeGen/BackendUtil.cpp (+16) 
- (modified) llvm/CMakeLists.txt (+4) 
- (modified) llvm/cmake/modules/HandleLLVMOptions.cmake (+12) 
- (modified) llvm/docs/CMake.rst (+11) 
- (modified) llvm/include/llvm/Config/config.h.cmake (+4) 
- (modified) llvm/include/llvm/IR/DebugLoc.h (+73-1) 
- (modified) llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (+5) 
- (modified) llvm/lib/IR/DebugInfo.cpp (+2-2) 
- (modified) llvm/lib/IR/DebugLoc.cpp (+16) 
- (modified) llvm/lib/Transforms/Utils/Debugify.cpp (+12-7) 


``````````diff
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index e765bbf637a661..20653daff7d4ae 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -911,6 +911,22 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
       Debugify.setOrigDIVerifyBugsReportFilePath(
           CodeGenOpts.DIBugsReportFilePath);
     Debugify.registerCallbacks(PIC, MAM);
+
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+    // If we're using debug location coverage tracking, mark all the
+    // instructions coming out of the frontend without a DebugLoc as being
+    // intentional line-zero locations, to prevent both those instructions and
+    // new instructions that inherit their location from being treated as
+    // incorrectly empty locations.
+    for (Function &F : *TheModule) {
+      if (!F.getSubprogram())
+        continue;
+      for (BasicBlock &BB : F)
+        for (Instruction &I : BB)
+          if (!I.getDebugLoc())
+            I.setDebugLoc(DebugLoc::getLineZero());
+    }
+#endif
   }
   // Attempt to load pass plugins and register their callbacks with PB.
   for (auto &PluginFN : CodeGenOpts.PassPlugins) {
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 12618966c4adfd..3e2e90f5adad2e 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -524,6 +524,10 @@ endif()
 
 option(LLVM_ENABLE_CRASH_DUMPS "Turn on memory dumps on crashes. Currently only implemented on Windows." OFF)
 
+set(LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING "DISABLED" CACHE STRING
+  "Enhance debugify's line number coverage tracking; enabling this is abi-breaking. Can be DISABLED, COVERAGE, or COVERAGE_AND_ORIGIN.")
+set_property(CACHE LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING PROPERTY STRINGS DISABLED COVERAGE COVERAGE_AND_ORIGIN)
+
 set(WINDOWS_PREFER_FORWARD_SLASH_DEFAULT OFF)
 if (MINGW)
   # Cygwin doesn't identify itself as Windows, and thus gets path::Style::posix
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 5ca580fbb59c59..a4b11c149da9de 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -196,6 +196,18 @@ else()
   message(FATAL_ERROR "Unknown value for LLVM_ABI_BREAKING_CHECKS: \"${LLVM_ABI_BREAKING_CHECKS}\"!")
 endif()
 
+string(TOUPPER "${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}" uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING)
+
+if( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE" )
+  set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 )
+elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE_AND_ORIGIN" )
+  message(FATAL_ERROR "\"COVERAGE_AND_ORIGIN\" setting for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING currently unimplemented.")
+elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "DISABLED" OR NOT DEFINED LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING )
+  # The DISABLED setting is default and requires no additional defines.
+else()
+  message(FATAL_ERROR "Unknown value for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING: \"${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}\"!")
+endif()
+
 if( LLVM_REVERSE_ITERATION )
   set( LLVM_ENABLE_REVERSE_ITERATION 1 )
 endif()
diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index 2a80813999ea1e..304e22759770d9 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -475,6 +475,17 @@ enabled sub-projects. Nearly all of these variable names begin with
 **LLVM_ENABLE_BINDINGS**:BOOL
   If disabled, do not try to build the OCaml bindings.
 
+**LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING**:STRING
+  Enhances Debugify's ability to detect line number errors by storing extra
+  information inside Instructions, removing false positives from Debugify's
+  results at the cost of performance. Allowed values are `DISABLED` (default),
+  `COVERAGE`, and `COVERAGE_AND_ORIGIN`. `COVERAGE` tracks whether and why a
+  line number was intentionally dropped or not generated for an instruction,
+  allowing Debugify to avoid reporting these as errors. `COVERAGE_AND_ORIGIN`
+  additionally stores a stacktrace of the point where each DebugLoc is
+  unintentionally dropped, allowing for much easier bug triaging at the cost of
+  a ~10x performance slowdown.
+
 **LLVM_ENABLE_DIA_SDK**:BOOL
   Enable building with MSVC DIA SDK for PDB debugging support. Available
   only with MSVC. Defaults to ON.
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
index ff30741c8f360a..388ce1e8f74e3e 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -19,6 +19,10 @@
 /* Define to 1 to enable crash memory dumps, and to 0 otherwise. */
 #cmakedefine01 LLVM_ENABLE_CRASH_DUMPS
 
+/* Define to 1 to enable expensive checks for debug location coverage checking,
+   and to 0 otherwise. */
+#cmakedefine01 ENABLE_DEBUGLOC_COVERAGE_TRACKING
+
 /* Define to 1 to prefer forward slashes on Windows, and to 0 prefer
    backslashes. */
 #cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH
diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h
index c22d3e9b10d27f..ae5f9d72c97e26 100644
--- a/llvm/include/llvm/IR/DebugLoc.h
+++ b/llvm/include/llvm/IR/DebugLoc.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_IR_DEBUGLOC_H
 #define LLVM_IR_DEBUGLOC_H
 
+#include "llvm/Config/config.h"
 #include "llvm/IR/TrackingMDRef.h"
 #include "llvm/Support/DataTypes.h"
 
@@ -22,6 +23,67 @@ namespace llvm {
   class LLVMContext;
   class raw_ostream;
   class DILocation;
+  class Function;
+
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+  // Used to represent different "kinds" of DebugLoc, expressing that a DebugLoc
+  // is either ordinary, containing a valid DILocation, or otherwise describing
+  // the reason why the DebugLoc does not contain a valid DILocation.
+  enum class DebugLocKind : uint8_t {
+    // DebugLoc is expected to contain a valid DILocation.
+    Normal,
+    // DebugLoc intentionally does not have a valid DILocation; may be for a
+    // compiler-generated instruction, or an explicitly dropped location.
+    LineZero,
+    // DebugLoc does not have a known or currently knowable source location,
+    // e.g. the attribution is ambiguous in a way that can't be represented, or
+    // determining the correct location is complicated and requires future
+    // developer effort.
+    Unknown,
+    // DebugLoc is attached to an instruction that we don't expect to be
+    // emitted, and so can omit a valid DILocation; we don't expect to ever try
+    // and emit these into the line table, and trying to do so is a sign that
+    // something has gone wrong (most likely a DebugLoc leaking from a transient
+    // compiler-generated instruction).
+    Temporary
+  };
+
+  // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify
+  // to ignore intentionally-empty DebugLocs.
+  class DILocAndCoverageTracking : public TrackingMDNodeRef {
+  public:
+    DebugLocKind Kind;
+    // Default constructor for empty DebugLocs.
+    DILocAndCoverageTracking()
+        : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {}
+    // Valid or nullptr MDNode*, normal DebugLocKind.
+    DILocAndCoverageTracking(const MDNode *Loc)
+        : TrackingMDNodeRef(const_cast<MDNode *>(Loc)),
+          Kind(DebugLocKind::Normal) {}
+    DILocAndCoverageTracking(const DILocation *Loc);
+    // Explicit DebugLocKind, which always means a nullptr MDNode*.
+    DILocAndCoverageTracking(DebugLocKind Kind)
+        : TrackingMDNodeRef(nullptr), Kind(Kind) {}
+  };
+  template <> struct simplify_type<DILocAndCoverageTracking> {
+    using SimpleType = MDNode *;
+
+    static MDNode *getSimplifiedValue(DILocAndCoverageTracking &MD) {
+      return MD.get();
+    }
+  };
+  template <> struct simplify_type<const DILocAndCoverageTracking> {
+    using SimpleType = MDNode *;
+
+    static MDNode *getSimplifiedValue(const DILocAndCoverageTracking &MD) {
+      return MD.get();
+    }
+  };
+
+  using DebugLocTrackingRef = DILocAndCoverageTracking;
+#else
+  using DebugLocTrackingRef = TrackingMDNodeRef;
+#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING
 
   /// A debug info location.
   ///
@@ -31,7 +93,8 @@ namespace llvm {
   /// To avoid extra includes, \a DebugLoc doubles the \a DILocation API with a
   /// one based on relatively opaque \a MDNode pointers.
   class DebugLoc {
-    TrackingMDNodeRef Loc;
+
+    DebugLocTrackingRef Loc;
 
   public:
     DebugLoc() = default;
@@ -47,6 +110,15 @@ namespace llvm {
     /// IR.
     explicit DebugLoc(const MDNode *N);
 
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+    DebugLoc(DebugLocKind Kind) : Loc(Kind) {}
+    DebugLocKind getKind() const { return Loc.Kind; }
+#endif
+
+    static DebugLoc getTemporary();
+    static DebugLoc getUnknown();
+    static DebugLoc getLineZero();
+
     /// Get the underlying \a DILocation.
     ///
     /// \pre !*this or \c isa<DILocation>(getAsMDNode()).
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index f88653146cc6ff..4ba8262259b112 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -31,6 +31,7 @@
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
 #include "llvm/IR/Constants.h"
@@ -2080,6 +2081,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   }
 
   if (!DL) {
+    // FIXME: We could assert that `DL.getKind() != DebugLocKind::Temporary`
+    // here, or otherwise record any temporary DebugLocs seen to ensure that
+    // transient compiler-generated instructions aren't leaking their DLs to
+    // other instructions.
     // We have an unspecified location, which might want to be line 0.
     // If we have already emitted a line-0 record, don't repeat it.
     if (LastAsmLine == 0)
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 7fa1f9696d43b2..86ac46540c5ef9 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -979,7 +979,7 @@ void Instruction::dropLocation() {
   }
 
   if (!MayLowerToCall) {
-    setDebugLoc(DebugLoc());
+    setDebugLoc(DebugLoc::getLineZero());
     return;
   }
 
@@ -998,7 +998,7 @@ void Instruction::dropLocation() {
     //
     // One alternative is to set a line 0 location with the existing scope and
     // inlinedAt info. The location might be sensitive to when inlining occurs.
-    setDebugLoc(DebugLoc());
+    setDebugLoc(DebugLoc::getLineZero());
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp
index bdea52180f74ae..501eafd0175b7b 100644
--- a/llvm/lib/IR/DebugLoc.cpp
+++ b/llvm/lib/IR/DebugLoc.cpp
@@ -11,6 +11,22 @@
 #include "llvm/IR/DebugInfo.h"
 using namespace llvm;
 
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L)
+    : TrackingMDNodeRef(const_cast<DILocation *>(L)),
+      Kind(DebugLocKind::Normal) {}
+
+DebugLoc DebugLoc::getTemporary() { return DebugLoc(DebugLocKind::Temporary); }
+DebugLoc DebugLoc::getUnknown() { return DebugLoc(DebugLocKind::Unknown); }
+DebugLoc DebugLoc::getLineZero() { return DebugLoc(DebugLocKind::LineZero); }
+
+#else
+
+DebugLoc DebugLoc::getTemporary() { return DebugLoc(); }
+DebugLoc DebugLoc::getUnknown() { return DebugLoc(); }
+DebugLoc DebugLoc::getLineZero() { return DebugLoc(); }
+#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING
+
 //===----------------------------------------------------------------------===//
 // DebugLoc Implementation
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index fcc82eadac36cf..f9f85d05ab45c5 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -292,6 +292,16 @@ bool llvm::stripDebugifyMetadata(Module &M) {
   return Changed;
 }
 
+bool hasLoc(const Instruction &I) {
+  const DILocation *Loc = I.getDebugLoc().get();
+#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
+  DebugLocKind Kind = I.getDebugLoc().getKind();
+  return Loc || Kind != DebugLocKind::Normal;
+#else
+  return Loc;
+#endif
+}
+
 bool llvm::collectDebugInfoMetadata(Module &M,
                                     iterator_range<Module::iterator> Functions,
                                     DebugInfoPerPass &DebugInfoBeforePass,
@@ -364,9 +374,7 @@ bool llvm::collectDebugInfoMetadata(Module &M,
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
         DebugInfoBeforePass.InstToDelete.insert({&I, &I});
 
-        const DILocation *Loc = I.getDebugLoc().get();
-        bool HasLoc = Loc != nullptr;
-        DebugInfoBeforePass.DILocations.insert({&I, HasLoc});
+        DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)});
       }
     }
   }
@@ -609,10 +617,7 @@ bool llvm::checkDebugInfoMetadata(Module &M,
 
         LLVM_DEBUG(dbgs() << "  Collecting info for inst: " << I << '\n');
 
-        const DILocation *Loc = I.getDebugLoc().get();
-        bool HasLoc = Loc != nullptr;
-
-        DebugInfoAfterPass.DILocations.insert({&I, HasLoc});
+        DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)});
       }
     }
   }

``````````

</details>


https://github.com/llvm/llvm-project/pull/107279


More information about the cfe-commits mailing list