[llvm] [Debugify] Improve reduction of debugify coverage build output (PR #150212)
Stephen Tozer via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 23 06:36:12 PDT 2025
https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/150212
>From 091164ecdb1490eae79d7390d6a3d0135fb660ca Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 23 Jul 2025 11:57:37 +0100
Subject: [PATCH 1/3] [Debugify] Improve reduction of debugify coverage build
output
In current DebugLoc coverage builds, the output for any reasonably large
build can become very large if any missing DebugLocs are present; this
happens because single errors in LLVM may result in many errors being
reported in the output report. The main cause of this is that the empty
locations attached to instructions may be propagated to other instructions
in later passes, which will each be reported as new errors. This patch
prevents this by adding an "unknown" annotation to instructions after
reporting them once, ensuring that any other DebugLocs copied or derived
from the original empty location will not be marked as new errors.
As a separate but related change, this patch updates the report generation
script to deduplicate results using the recorded stacktrace if they are
available, instead of the pass+instruction combination. This reduces the
size of the reduction, but makes the reduction highly reliable, as the
stacktrace allows us to very precisely identify when two bugs have
originated from the same place.
---
llvm/lib/Transforms/Utils/Debugify.cpp | 9 ++++
llvm/utils/llvm-original-di-preservation.py | 50 ++++++++++++++-------
2 files changed, 42 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 4210ce6da1eb2..39ff6e94510fb 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -707,6 +707,15 @@ bool llvm::checkDebugInfoMetadata(Module &M,
DILocsBefore, DILocsAfter, InstToDelete, NameOfWrappedPass,
FileNameFromCU, ShouldWriteIntoJSON, Bugs);
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE
+ // If we are tracking DebugLoc coverage, replace each empty DebugLoc with an
+ // annotated location now so that it does not show up in future passes even if
+ // it is propagated to other instructions.
+ for (const auto &L : DILocsAfter) {
+ if (!L.second)
+ L.first->setDebugLoc(DebugLoc::getUnknown());
+#endif
+
bool ResultForVars = checkVars(DIVarsBefore, DIVarsAfter, NameOfWrappedPass,
FileNameFromCU, ShouldWriteIntoJSON, Bugs);
diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py
index b5ccd7a3224f8..cdca2ec8153b8 100755
--- a/llvm/utils/llvm-original-di-preservation.py
+++ b/llvm/utils/llvm-original-di-preservation.py
@@ -6,6 +6,7 @@
from __future__ import print_function
import argparse
import os
+import re
import sys
from json import loads
from collections import defaultdict
@@ -22,6 +23,14 @@ def __init__(self, origin, action, bb_name, fn_name, instr):
def key(self):
return self.action + self.bb_name + self.fn_name + self.instr
+ def reduced_key(self, bug_pass):
+ if self.origin is not None:
+ # If we have the origin stacktrace available, we can use it to efficiently deduplicate identical errors. We
+ # just need to remove the pointer values from the string first, so that we can deduplicate across files.
+ origin_no_addr = re.sub(r"0x[0-9a-fA-F]+", "", self.origin)
+ return origin_no_addr
+ return bug_pass + self.instr
+
def to_dict(self):
result = {
"instr": self.instr,
@@ -42,6 +51,9 @@ def __init__(self, action, fn_name):
def key(self):
return self.action + self.fn_name
+ def reduced_key(self, bug_pass):
+ return bug_pass + self.fn_name
+
def to_dict(self):
return {
"fn_name": self.fn_name,
@@ -58,6 +70,9 @@ def __init__(self, action, name, fn_name):
def key(self):
return self.action + self.name + self.fn_name
+ def reduced_key(self, bug_pass):
+ return bug_pass + self.name
+
def to_dict(self):
return {
"fn_name": self.fn_name,
@@ -478,7 +493,11 @@ def get_json_chunk(file, start, size):
# Parse the program arguments.
def parse_program_args(parser):
parser.add_argument("file_name", type=str, help="json file to process")
- parser.add_argument("--reduce", action="store_true", help="create reduced report")
+ parser.add_argument(
+ "--reduce",
+ action="store_true",
+ help="create reduced report by deduplicating bugs within and across files",
+ )
report_type_group = parser.add_mutually_exclusive_group(required=True)
report_type_group.add_argument(
@@ -523,13 +542,10 @@ def Main():
di_sp_bugs_summary = OrderedDict()
di_var_bugs_summary = OrderedDict()
- # Compress similar bugs.
- # DILocBugs with same pass & instruction name.
- di_loc_pass_instr_set = set()
- # DISPBugs with same pass & function name.
- di_sp_pass_fn_set = set()
- # DIVarBugs with same pass & variable name.
- di_var_pass_var_set = set()
+ # If we are using --reduce, use these sets to deduplicate similar bugs within and across files.
+ di_loc_reduced_set = set()
+ di_sp_reduced_set = set()
+ di_var_reduced_set = set()
start_line = 0
chunk_size = 1000000
@@ -585,9 +601,9 @@ def Main():
if not di_loc_bug.key() in di_loc_set:
di_loc_set.add(di_loc_bug.key())
if opts.reduce:
- pass_instr = bugs_pass + instr
- if not pass_instr in di_loc_pass_instr_set:
- di_loc_pass_instr_set.add(pass_instr)
+ reduced_key = di_loc_bug.reduced_key(bugs_pass)
+ if not reduced_key in di_loc_reduced_set:
+ di_loc_reduced_set.add(reduced_key)
di_loc_bugs.append(di_loc_bug)
else:
di_loc_bugs.append(di_loc_bug)
@@ -608,9 +624,9 @@ def Main():
if not di_sp_bug.key() in di_sp_set:
di_sp_set.add(di_sp_bug.key())
if opts.reduce:
- pass_fn = bugs_pass + name
- if not pass_fn in di_sp_pass_fn_set:
- di_sp_pass_fn_set.add(pass_fn)
+ reduced_key = di_sp_bug.reduced_key(bugs_pass)
+ if not reduced_key in di_sp_reduced_set:
+ di_sp_reduced_set.add(reduced_key)
di_sp_bugs.append(di_sp_bug)
else:
di_sp_bugs.append(di_sp_bug)
@@ -632,9 +648,9 @@ def Main():
if not di_var_bug.key() in di_var_set:
di_var_set.add(di_var_bug.key())
if opts.reduce:
- pass_var = bugs_pass + name
- if not pass_var in di_var_pass_var_set:
- di_var_pass_var_set.add(pass_var)
+ reduced_key = di_var_bug.reduced_key(bugs_pass)
+ if not reduced_key in di_var_reduced_set:
+ di_var_reduced_set.add(reduced_key)
di_var_bugs.append(di_var_bug)
else:
di_var_bugs.append(di_var_bug)
>From da861d32347ca5c237455619daf5134d7f410672 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 23 Jul 2025 12:58:30 +0100
Subject: [PATCH 2/3] Remove opening brace
---
llvm/lib/Transforms/Utils/Debugify.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 39ff6e94510fb..cf1bfd9be594b 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -711,7 +711,7 @@ bool llvm::checkDebugInfoMetadata(Module &M,
// If we are tracking DebugLoc coverage, replace each empty DebugLoc with an
// annotated location now so that it does not show up in future passes even if
// it is propagated to other instructions.
- for (const auto &L : DILocsAfter) {
+ for (const auto &L : DILocsAfter)
if (!L.second)
L.first->setDebugLoc(DebugLoc::getUnknown());
#endif
>From 563e681b16d62bc8922944ac8e89cc4b225a81f5 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Wed, 23 Jul 2025 14:35:55 +0100
Subject: [PATCH 3/3] Remove const
---
llvm/lib/Transforms/Utils/Debugify.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index cf1bfd9be594b..61af3a6f7d903 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -711,7 +711,7 @@ bool llvm::checkDebugInfoMetadata(Module &M,
// If we are tracking DebugLoc coverage, replace each empty DebugLoc with an
// annotated location now so that it does not show up in future passes even if
// it is propagated to other instructions.
- for (const auto &L : DILocsAfter)
+ for (auto &L : DILocsAfter)
if (!L.second)
L.first->setDebugLoc(DebugLoc::getUnknown());
#endif
More information about the llvm-commits
mailing list