[llvm] 8b38a2c - [Debugify][OriginalDIMode] Update script to handle large JSON reports
Djordje Todorovic via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 29 07:48:38 PDT 2022
Author: Nikola Tesic
Date: 2022-09-29T16:48:06+02:00
New Revision: 8b38a2c0a55a9140115a91959326918d99bea435
URL: https://github.com/llvm/llvm-project/commit/8b38a2c0a55a9140115a91959326918d99bea435
DIFF: https://github.com/llvm/llvm-project/commit/8b38a2c0a55a9140115a91959326918d99bea435.diff
LOG: [Debugify][OriginalDIMode] Update script to handle large JSON reports
This patch updates llvm/utils/llvm-original-di-preservation.py to create more
compact HTML verify-debuginfo-preserve reports by:
- removing duplicated debug info bugs,
- introducing -compress option to create highly compressed report.
Additionally, this patch makes script able to process very large JSON inputs.
That is done by reading & analyzing JSON report in chunks.
Differential Revision: https://reviews.llvm.org/D115617
Added:
llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html
Modified:
llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html
llvm/test/tools/llvm-original-di-preservation/basic.test
llvm/utils/llvm-original-di-preservation.py
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html
new file mode 100644
index 000000000000..43f9990a0916
--- /dev/null
+++ b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html
@@ -0,0 +1,110 @@
+ <html>
+ <head>
+ <style>
+ table, th, td {
+ border: 1px solid black;
+ }
+ table.center {
+ margin-left: auto;
+ margin-right: auto;
+ }
+ </style>
+ </head>
+ <body>
+ <table>
+ <caption><b>Location Bugs found by the Debugify</b></caption>
+ <tr>
+ <th>File</th>
+ <th>LLVM Pass Name</th>
+ <th>LLVM IR Instruction</th>
+ <th>Function Name</th>
+ <th>Basic Block Name</th>
+ <th>Action</th>
+ </tr>
+ </tr>
+ <tr>
+ <td>test.ll</td>
+ <td>no-name</td>
+ <td>extractvalue</td>
+ <td>fn</td>
+ <td>no-name</td>
+ <td>not-generate</td>
+ </tr>
+ <tr>
+ <td>test.ll</td>
+ <td>no-name</td>
+ <td>insertvalue</td>
+ <td>fn</td>
+ <td>no-name</td>
+ <td>not-generate</td>
+ </tr>
+ <tr>
+</table>
+<br>
+<table>
+ <caption><b>Summary of Location Bugs</b></caption>
+ <tr>
+ <th>LLVM Pass Name</th>
+ <th>Number of bugs</th>
+ </tr>
+ <tr>
+ <td>no-name</td>
+ <td>8</td>
+ </tr>
+ <tr>
+</table>
+<br>
+<br>
+<table>
+ <caption><b>SP Bugs found by the Debugify</b></caption>
+ <tr>
+ <th>File</th>
+ <th>LLVM Pass Name</th>
+ <th>Function Name</th>
+ <th>Action</th>
+ </tr>
+<tr>
+ <td colspan='4'> No bugs found </td>
+ </tr>
+ </table>
+<br>
+<table>
+ <caption><b>Summary of SP Bugs</b></caption>
+ <tr>
+ <th>LLVM Pass Name</th>
+ <th>Number of bugs</th>
+ </tr>
+ <tr>
+<tr>
+ <td colspan='2'> No bugs found </td>
+ </tr>
+ </table>
+<br>
+<br>
+<table>
+ <caption><b>Variable Location Bugs found by the Debugify</b></caption>
+ <tr>
+ <th>File</th>
+ <th>LLVM Pass Name</th>
+ <th>Variable</th>
+ <th>Function</th>
+ <th>Action</th>
+ </tr>
+<tr>
+ <td colspan='4'> No bugs found </td>
+ </tr>
+ </table>
+<br>
+<table>
+ <caption><b>Summary of Variable Location Bugs</b></caption>
+ <tr>
+ <th>LLVM Pass Name</th>
+ <th>Number of bugs</th>
+ </tr>
+ <tr>
+<tr>
+ <td colspan='2'> No bugs found </td>
+ </tr>
+ </table>
+</body>
+ </html>
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html
index 6fc1b69f7071..c861d3a6adf6 100644
--- a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html
+++ b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html
@@ -41,22 +41,6 @@
<tr>
<td>test.ll</td>
<td>no-name</td>
- <td>extractvalue</td>
- <td>fn</td>
- <td>no-name</td>
- <td>not-generate</td>
- </tr>
- <tr>
- <td>test.ll</td>
- <td>no-name</td>
- <td>insertvalue</td>
- <td>fn1</td>
- <td>no-name</td>
- <td>not-generate</td>
- </tr>
- <tr>
- <td>test.ll</td>
- <td>no-name</td>
<td>insertvalue</td>
<td>fn1</td>
<td>no-name</td>
@@ -65,22 +49,6 @@
<tr>
<td>test.ll</td>
<td>no-name</td>
- <td>insertvalue</td>
- <td>fn</td>
- <td>no-name</td>
- <td>not-generate</td>
- </tr>
- <tr>
- <td>test.ll</td>
- <td>no-name</td>
- <td>extractvalue</td>
- <td>fn1</td>
- <td>no-name</td>
- <td>not-generate</td>
- </tr>
- <tr>
- <td>test.ll</td>
- <td>no-name</td>
<td>extractvalue</td>
<td>fn1</td>
<td>no-name</td>
diff --git a/llvm/test/tools/llvm-original-di-preservation/basic.test b/llvm/test/tools/llvm-original-di-preservation/basic.test
index 12292f209fe7..81f987aa221b 100644
--- a/llvm/test/tools/llvm-original-di-preservation/basic.test
+++ b/llvm/test/tools/llvm-original-di-preservation/basic.test
@@ -6,3 +6,8 @@ RUN: %llvm-original-di-preservation %p/Inputs/corrupted.json %t2.html | FileChec
RUN:
diff -w %p/Inputs/expected-skipped.html %t2.html
CORRUPTED: Skipped lines: 3
CORRUPTED: Skipped bugs: 1
+
+RUN: %llvm-original-di-preservation -compress %p/Inputs/sample.json %t3.html | FileCheck %s -check-prefix=COMPRESSED
+RUN:
diff -w %p/Inputs/expected-compressed.html %t3.html
+COMPRESSED-NOT: Skipped lines:
+
diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py
index 73d7d4bf90cf..5b53e6ad3d67 100755
--- a/llvm/utils/llvm-original-di-preservation.py
+++ b/llvm/utils/llvm-original-di-preservation.py
@@ -17,17 +17,23 @@ def __init__(self, action, bb_name, fn_name, instr):
self.bb_name = bb_name
self.fn_name = fn_name
self.instr = instr
+ def __str__(self):
+ return self.action + self.bb_name + self.fn_name + self.instr
class DISPBug:
def __init__(self, action, fn_name):
self.action = action
self.fn_name = fn_name
+ def __str__(self):
+ return self.action + self.fn_name
class DIVarBug:
def __init__(self, action, name, fn_name):
self.action = action
self.name = name
self.fn_name = fn_name
+ def __str__(self):
+ return self.action + self.name + self.fn_name
# Report the bugs in form of html.
def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \
@@ -326,11 +332,12 @@ def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \
print("The " + html_file + " generated.")
-# Read the JSON file.
-def get_json(file):
+# Read the JSON file in chunks.
+def get_json_chunk(file,start,size):
json_parsed = None
di_checker_data = []
skipped_lines = 0
+ line = 0
# The file contains json object per line.
# An example of the line (formatted json):
@@ -354,6 +361,11 @@ def get_json(file):
#}
with open(file) as json_objects_file:
for json_object_line in json_objects_file:
+ line += 1
+ if line < start:
+ continue
+ if line >= start+size:
+ break
try:
json_object = loads(json_object_line)
except:
@@ -361,12 +373,13 @@ def get_json(file):
else:
di_checker_data.append(json_object)
- return (di_checker_data, skipped_lines)
+ return (di_checker_data, skipped_lines, line)
# Parse the program arguments.
def parse_program_args(parser):
parser.add_argument("file_name", type=str, help="json file to process")
parser.add_argument("html_file", type=str, help="html file to output data")
+ parser.add_argument("-compress", action="store_true", help="create reduced html report")
return parser.parse_args()
@@ -378,8 +391,6 @@ def Main():
print ("error: The output file must be '.html'.")
sys.exit(1)
- (debug_info_bugs, skipped_lines) = get_json(opts.file_name)
-
# Use the defaultdict in order to make multidim dicts.
di_location_bugs = defaultdict(lambda: defaultdict(dict))
di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
@@ -390,81 +401,132 @@ def Main():
di_sp_bugs_summary = OrderedDict()
di_var_bugs_summary = OrderedDict()
+ # Compress similar bugs.
+ # DILocBugs with same pass & instruction name.
+ di_loc_pass_instr_set = set()
+ # DISPBugs with same pass & function name.
+ di_sp_pass_fn_set = set()
+ # DIVarBugs with same pass & variable name.
+ di_var_pass_var_set = set()
+
+ start_line = 0
+ chunk_size = 1000000
+ end_line = chunk_size - 1
+ skipped_lines = 0
skipped_bugs = 0
- # Map the bugs into the file-pass pairs.
- for bugs_per_pass in debug_info_bugs:
- try:
- bugs_file = bugs_per_pass["file"]
- bugs_pass = bugs_per_pass["pass"]
- bugs = bugs_per_pass["bugs"][0]
- except:
- skipped_lines += 1
- continue
-
- di_loc_bugs = []
- di_sp_bugs = []
- di_var_bugs = []
-
- for bug in bugs:
+ # Process each chunk of 1 million JSON lines.
+ while True:
+ if start_line > end_line:
+ break
+ (debug_info_bugs, skipped, end_line) = get_json_chunk(opts.file_name,start_line,chunk_size)
+ start_line += chunk_size
+ skipped_lines += skipped
+
+ # Map the bugs into the file-pass pairs.
+ for bugs_per_pass in debug_info_bugs:
try:
- bugs_metadata = bug["metadata"]
+ bugs_file = bugs_per_pass["file"]
+ bugs_pass = bugs_per_pass["pass"]
+ bugs = bugs_per_pass["bugs"][0]
except:
- skipped_bugs += 1
+ skipped_lines += 1
continue
- if bugs_metadata == "DILocation":
- try:
- action = bug["action"]
- bb_name = bug["bb-name"]
- fn_name = bug["fn-name"]
- instr = bug["instr"]
- except:
- skipped_bugs += 1
- continue
- di_loc_bugs.append(DILocBug(action, bb_name, fn_name, instr))
+ di_loc_bugs = []
+ di_sp_bugs = []
+ di_var_bugs = []
- # Fill the summary dict.
- if bugs_pass in di_location_bugs_summary:
- di_location_bugs_summary[bugs_pass] += 1
- else:
- di_location_bugs_summary[bugs_pass] = 1
- elif bugs_metadata == "DISubprogram":
+ # Omit duplicated bugs.
+ di_loc_set = set()
+ di_sp_set = set()
+ di_var_set = set()
+ for bug in bugs:
try:
- action = bug["action"]
- name = bug["name"]
+ bugs_metadata = bug["metadata"]
except:
skipped_bugs += 1
continue
- di_sp_bugs.append(DISPBug(action, name))
- # Fill the summary dict.
- if bugs_pass in di_sp_bugs_summary:
- di_sp_bugs_summary[bugs_pass] += 1
+ if bugs_metadata == "DILocation":
+ try:
+ action = bug["action"]
+ bb_name = bug["bb-name"]
+ fn_name = bug["fn-name"]
+ instr = bug["instr"]
+ except:
+ skipped_bugs += 1
+ continue
+ di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
+ if not str(di_loc_bug) in di_loc_set:
+ di_loc_set.add(str(di_loc_bug))
+ if opts.compress:
+ pass_instr = bugs_pass + instr
+ if not pass_instr in di_loc_pass_instr_set:
+ di_loc_pass_instr_set.add(pass_instr)
+ di_loc_bugs.append(di_loc_bug)
+ else:
+ di_loc_bugs.append(di_loc_bug)
+
+ # Fill the summary dict.
+ if bugs_pass in di_location_bugs_summary:
+ di_location_bugs_summary[bugs_pass] += 1
+ else:
+ di_location_bugs_summary[bugs_pass] = 1
+ elif bugs_metadata == "DISubprogram":
+ try:
+ action = bug["action"]
+ name = bug["name"]
+ except:
+ skipped_bugs += 1
+ continue
+ di_sp_bug = DISPBug(action, name)
+ if not str(di_sp_bug) in di_sp_set:
+ di_sp_set.add(str(di_sp_bug))
+ if opts.compress:
+ pass_fn = bugs_pass + name
+ if not pass_fn in di_sp_pass_fn_set:
+ di_sp_pass_fn_set.add(pass_fn)
+ di_sp_bugs.append(di_sp_bug)
+ else:
+ di_sp_bugs.append(di_sp_bug)
+
+ # Fill the summary dict.
+ if bugs_pass in di_sp_bugs_summary:
+ di_sp_bugs_summary[bugs_pass] += 1
+ else:
+ di_sp_bugs_summary[bugs_pass] = 1
+ elif bugs_metadata == "dbg-var-intrinsic":
+ try:
+ action = bug["action"]
+ fn_name = bug["fn-name"]
+ name = bug["name"]
+ except:
+ skipped_bugs += 1
+ continue
+ di_var_bug = DIVarBug(action, name, fn_name)
+ if not str(di_var_bug) in di_var_set:
+ di_var_set.add(str(di_var_bug))
+ if opts.compress:
+ pass_var = bugs_pass + name
+ if not pass_var in di_var_pass_var_set:
+ di_var_pass_var_set.add(pass_var)
+ di_var_bugs.append(di_var_bug)
+ else:
+ di_var_bugs.append(di_var_bug)
+
+ # Fill the summary dict.
+ if bugs_pass in di_var_bugs_summary:
+ di_var_bugs_summary[bugs_pass] += 1
+ else:
+ di_var_bugs_summary[bugs_pass] = 1
else:
- di_sp_bugs_summary[bugs_pass] = 1
- elif bugs_metadata == "dbg-var-intrinsic":
- try:
- action = bug["action"]
- fn_name = bug["fn-name"]
- name = bug["name"]
- except:
+ # Unsupported metadata.
skipped_bugs += 1
continue
- di_var_bugs.append(DIVarBug(action, name, fn_name))
-
- # Fill the summary dict.
- if bugs_pass in di_var_bugs_summary:
- di_var_bugs_summary[bugs_pass] += 1
- else:
- di_var_bugs_summary[bugs_pass] = 1
- else:
- # Unsupported metadata.
- skipped_bugs += 1
- continue
- di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
- di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
- di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs
+ di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
+ di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
+ di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs
generate_html_report(di_location_bugs, di_subprogram_bugs, di_variable_bugs, \
di_location_bugs_summary, di_sp_bugs_summary, \
More information about the llvm-commits
mailing list