[llvm] 8b38a2c - [Debugify][OriginalDIMode] Update script to handle large JSON reports

Djordje Todorovic via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 29 07:48:38 PDT 2022


Author: Nikola Tesic
Date: 2022-09-29T16:48:06+02:00
New Revision: 8b38a2c0a55a9140115a91959326918d99bea435

URL: https://github.com/llvm/llvm-project/commit/8b38a2c0a55a9140115a91959326918d99bea435
DIFF: https://github.com/llvm/llvm-project/commit/8b38a2c0a55a9140115a91959326918d99bea435.diff

LOG: [Debugify][OriginalDIMode] Update script to handle large JSON reports

This patch updates llvm/utils/llvm-original-di-preservation.py to create more
compact HTML verify-debuginfo-preserve reports by:
- removing duplicated debug info bugs,
- introducing -compress option to create highly compressed report.
Additionally, this patch makes script able to process very large JSON inputs.
That is done by reading & analyzing JSON report in chunks.

Differential Revision: https://reviews.llvm.org/D115617

Added: 
    llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html

Modified: 
    llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html
    llvm/test/tools/llvm-original-di-preservation/basic.test
    llvm/utils/llvm-original-di-preservation.py

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html
new file mode 100644
index 000000000000..43f9990a0916
--- /dev/null
+++ b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html
@@ -0,0 +1,110 @@
+ <html>
+  <head>
+  <style>
+  table, th, td {
+    border: 1px solid black;
+  }
+  table.center {
+    margin-left: auto;
+    margin-right: auto;
+  }
+  </style>
+  </head>
+  <body>
+  <table>
+  <caption><b>Location Bugs found by the Debugify</b></caption>
+  <tr>
+      <th>File</th>
+    <th>LLVM Pass Name</th>
+    <th>LLVM IR Instruction</th>
+    <th>Function Name</th>
+    <th>Basic Block Name</th>
+    <th>Action</th>
+  </tr>
+  </tr>
+    <tr>
+    <td>test.ll</td>
+    <td>no-name</td>
+    <td>extractvalue</td>
+    <td>fn</td>
+    <td>no-name</td>
+    <td>not-generate</td>
+    </tr>
+    <tr>
+    <td>test.ll</td>
+    <td>no-name</td>
+    <td>insertvalue</td>
+    <td>fn</td>
+    <td>no-name</td>
+    <td>not-generate</td>
+    </tr>
+  <tr>
+</table>
+<br>
+<table>
+  <caption><b>Summary of Location Bugs</b></caption>
+  <tr>
+      <th>LLVM Pass Name</th>
+    <th>Number of bugs</th>
+  </tr>
+    <tr>
+    <td>no-name</td>
+    <td>8</td>
+    </tr>
+  <tr>
+</table>
+<br>
+<br>
+<table>
+  <caption><b>SP Bugs found by the Debugify</b></caption>
+  <tr>
+      <th>File</th>
+    <th>LLVM Pass Name</th>
+    <th>Function Name</th>
+    <th>Action</th>
+  </tr>
+<tr>
+        <td colspan='4'> No bugs found </td>
+      </tr>
+    </table>
+<br>
+<table>
+  <caption><b>Summary of SP Bugs</b></caption>
+  <tr>
+      <th>LLVM Pass Name</th>
+    <th>Number of bugs</th>
+  </tr>
+  <tr>
+<tr>
+        <td colspan='2'> No bugs found </td>
+      </tr>
+    </table>
+<br>
+<br>
+<table>
+  <caption><b>Variable Location Bugs found by the Debugify</b></caption>
+  <tr>
+      <th>File</th>
+    <th>LLVM Pass Name</th>
+    <th>Variable</th>
+    <th>Function</th>
+    <th>Action</th>
+  </tr>
+<tr>
+        <td colspan='4'> No bugs found </td>
+      </tr>
+    </table>
+<br>
+<table>
+  <caption><b>Summary of Variable Location Bugs</b></caption>
+  <tr>
+      <th>LLVM Pass Name</th>
+    <th>Number of bugs</th>
+  </tr>
+  <tr>
+<tr>
+        <td colspan='2'> No bugs found </td>
+      </tr>
+    </table>
+</body>
+  </html>
\ No newline at end of file

diff  --git a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html
index 6fc1b69f7071..c861d3a6adf6 100644
--- a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html
+++ b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html
@@ -41,22 +41,6 @@
     <tr>
     <td>test.ll</td>
     <td>no-name</td>
-    <td>extractvalue</td>
-    <td>fn</td>
-    <td>no-name</td>
-    <td>not-generate</td>
-    </tr>
-    <tr>
-    <td>test.ll</td>
-    <td>no-name</td>
-    <td>insertvalue</td>
-    <td>fn1</td>
-    <td>no-name</td>
-    <td>not-generate</td>
-    </tr>
-    <tr>
-    <td>test.ll</td>
-    <td>no-name</td>
     <td>insertvalue</td>
     <td>fn1</td>
     <td>no-name</td>
@@ -65,22 +49,6 @@
     <tr>
     <td>test.ll</td>
     <td>no-name</td>
-    <td>insertvalue</td>
-    <td>fn</td>
-    <td>no-name</td>
-    <td>not-generate</td>
-    </tr>
-    <tr>
-    <td>test.ll</td>
-    <td>no-name</td>
-    <td>extractvalue</td>
-    <td>fn1</td>
-    <td>no-name</td>
-    <td>not-generate</td>
-    </tr>
-    <tr>
-    <td>test.ll</td>
-    <td>no-name</td>
     <td>extractvalue</td>
     <td>fn1</td>
     <td>no-name</td>

diff  --git a/llvm/test/tools/llvm-original-di-preservation/basic.test b/llvm/test/tools/llvm-original-di-preservation/basic.test
index 12292f209fe7..81f987aa221b 100644
--- a/llvm/test/tools/llvm-original-di-preservation/basic.test
+++ b/llvm/test/tools/llvm-original-di-preservation/basic.test
@@ -6,3 +6,8 @@ RUN: %llvm-original-di-preservation %p/Inputs/corrupted.json %t2.html | FileChec
 RUN: 
diff  -w %p/Inputs/expected-skipped.html %t2.html
 CORRUPTED: Skipped lines: 3
 CORRUPTED: Skipped bugs: 1
+
+RUN: %llvm-original-di-preservation -compress %p/Inputs/sample.json %t3.html | FileCheck %s -check-prefix=COMPRESSED
+RUN: 
diff  -w %p/Inputs/expected-compressed.html %t3.html
+COMPRESSED-NOT: Skipped lines:
+

diff  --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py
index 73d7d4bf90cf..5b53e6ad3d67 100755
--- a/llvm/utils/llvm-original-di-preservation.py
+++ b/llvm/utils/llvm-original-di-preservation.py
@@ -17,17 +17,23 @@ def __init__(self, action, bb_name, fn_name, instr):
     self.bb_name = bb_name
     self.fn_name = fn_name
     self.instr = instr
+  def __str__(self):
+    return self.action + self.bb_name + self.fn_name + self.instr
 
 class DISPBug:
   def __init__(self, action, fn_name):
     self.action = action
     self.fn_name = fn_name
+  def __str__(self):
+    return self.action + self.fn_name
 
 class DIVarBug:
   def __init__(self, action, name, fn_name):
     self.action = action
     self.name = name
     self.fn_name = fn_name
+  def __str__(self):
+    return self.action + self.name + self.fn_name
 
 # Report the bugs in form of html.
 def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \
@@ -326,11 +332,12 @@ def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \
 
   print("The " + html_file + " generated.")
 
-# Read the JSON file.
-def get_json(file):
+# Read the JSON file in chunks.
+def get_json_chunk(file,start,size):
   json_parsed = None
   di_checker_data = []
   skipped_lines = 0
+  line = 0
 
   # The file contains json object per line.
   # An example of the line (formatted json):
@@ -354,6 +361,11 @@ def get_json(file):
   #}
   with open(file) as json_objects_file:
     for json_object_line in json_objects_file:
+      line += 1
+      if line < start:
+        continue
+      if line >= start+size:
+        break
       try:
         json_object = loads(json_object_line)
       except:
@@ -361,12 +373,13 @@ def get_json(file):
       else:
         di_checker_data.append(json_object)
 
-  return (di_checker_data, skipped_lines)
+  return (di_checker_data, skipped_lines, line)
 
 # Parse the program arguments.
 def parse_program_args(parser):
   parser.add_argument("file_name", type=str, help="json file to process")
   parser.add_argument("html_file", type=str, help="html file to output data")
+  parser.add_argument("-compress", action="store_true", help="create reduced html report")
 
   return parser.parse_args()
 
@@ -378,8 +391,6 @@ def Main():
     print ("error: The output file must be '.html'.")
     sys.exit(1)
 
-  (debug_info_bugs, skipped_lines) = get_json(opts.file_name)
-
   # Use the defaultdict in order to make multidim dicts.
   di_location_bugs = defaultdict(lambda: defaultdict(dict))
   di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
@@ -390,81 +401,132 @@ def Main():
   di_sp_bugs_summary = OrderedDict()
   di_var_bugs_summary = OrderedDict()
 
+  # Compress similar bugs.
+  # DILocBugs with same pass & instruction name.
+  di_loc_pass_instr_set = set()
+  # DISPBugs with same pass & function name.
+  di_sp_pass_fn_set = set()
+  # DIVarBugs with same pass & variable name.
+  di_var_pass_var_set = set()
+
+  start_line = 0
+  chunk_size = 1000000
+  end_line = chunk_size - 1
+  skipped_lines = 0
   skipped_bugs = 0
-  # Map the bugs into the file-pass pairs.
-  for bugs_per_pass in debug_info_bugs:
-    try:
-      bugs_file = bugs_per_pass["file"]
-      bugs_pass = bugs_per_pass["pass"]
-      bugs = bugs_per_pass["bugs"][0]
-    except:
-      skipped_lines += 1
-      continue
-
-    di_loc_bugs = []
-    di_sp_bugs = []
-    di_var_bugs = []
-
-    for bug in bugs:
+  # Process each chunk of 1 million JSON lines.
+  while True:
+    if start_line > end_line:
+      break
+    (debug_info_bugs, skipped, end_line) = get_json_chunk(opts.file_name,start_line,chunk_size)
+    start_line += chunk_size
+    skipped_lines += skipped
+
+    # Map the bugs into the file-pass pairs.
+    for bugs_per_pass in debug_info_bugs:
       try:
-        bugs_metadata = bug["metadata"]
+        bugs_file = bugs_per_pass["file"]
+        bugs_pass = bugs_per_pass["pass"]
+        bugs = bugs_per_pass["bugs"][0]
       except:
-        skipped_bugs += 1
+        skipped_lines += 1
         continue
 
-      if bugs_metadata == "DILocation":
-        try:
-          action = bug["action"]
-          bb_name = bug["bb-name"]
-          fn_name = bug["fn-name"]
-          instr = bug["instr"]
-        except:
-          skipped_bugs += 1
-          continue
-        di_loc_bugs.append(DILocBug(action, bb_name, fn_name, instr))
+      di_loc_bugs = []
+      di_sp_bugs = []
+      di_var_bugs = []
 
-        # Fill the summary dict.
-        if bugs_pass in di_location_bugs_summary:
-          di_location_bugs_summary[bugs_pass] += 1
-        else:
-          di_location_bugs_summary[bugs_pass] = 1
-      elif bugs_metadata == "DISubprogram":
+      # Omit duplicated bugs.
+      di_loc_set = set()
+      di_sp_set = set()
+      di_var_set = set()
+      for bug in bugs:
         try:
-          action = bug["action"]
-          name = bug["name"]
+          bugs_metadata = bug["metadata"]
         except:
           skipped_bugs += 1
           continue
-        di_sp_bugs.append(DISPBug(action, name))
 
-        # Fill the summary dict.
-        if bugs_pass in di_sp_bugs_summary:
-          di_sp_bugs_summary[bugs_pass] += 1
+        if bugs_metadata == "DILocation":
+          try:
+            action = bug["action"]
+            bb_name = bug["bb-name"]
+            fn_name = bug["fn-name"]
+            instr = bug["instr"]
+          except:
+            skipped_bugs += 1
+            continue
+          di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
+          if not str(di_loc_bug) in di_loc_set:
+            di_loc_set.add(str(di_loc_bug))
+            if opts.compress:
+              pass_instr = bugs_pass + instr
+              if not pass_instr in di_loc_pass_instr_set:
+                di_loc_pass_instr_set.add(pass_instr)
+                di_loc_bugs.append(di_loc_bug)
+            else:
+              di_loc_bugs.append(di_loc_bug)
+
+          # Fill the summary dict.
+          if bugs_pass in di_location_bugs_summary:
+            di_location_bugs_summary[bugs_pass] += 1
+          else:
+            di_location_bugs_summary[bugs_pass] = 1
+        elif bugs_metadata == "DISubprogram":
+          try:
+            action = bug["action"]
+            name = bug["name"]
+          except:
+            skipped_bugs += 1
+            continue
+          di_sp_bug = DISPBug(action, name)
+          if not str(di_sp_bug) in di_sp_set:
+            di_sp_set.add(str(di_sp_bug))
+            if opts.compress:
+              pass_fn = bugs_pass + name
+              if not pass_fn in di_sp_pass_fn_set:
+                di_sp_pass_fn_set.add(pass_fn)
+                di_sp_bugs.append(di_sp_bug)
+            else:
+              di_sp_bugs.append(di_sp_bug)
+
+          # Fill the summary dict.
+          if bugs_pass in di_sp_bugs_summary:
+            di_sp_bugs_summary[bugs_pass] += 1
+          else:
+            di_sp_bugs_summary[bugs_pass] = 1
+        elif bugs_metadata == "dbg-var-intrinsic":
+          try:
+            action = bug["action"]
+            fn_name = bug["fn-name"]
+            name = bug["name"]
+          except:
+            skipped_bugs += 1
+            continue
+          di_var_bug = DIVarBug(action, name, fn_name)
+          if not str(di_var_bug) in di_var_set:
+            di_var_set.add(str(di_var_bug))
+            if opts.compress:
+              pass_var = bugs_pass + name
+              if not pass_var in di_var_pass_var_set:
+                di_var_pass_var_set.add(pass_var)
+                di_var_bugs.append(di_var_bug)
+            else:
+              di_var_bugs.append(di_var_bug)
+
+          # Fill the summary dict.
+          if bugs_pass in di_var_bugs_summary:
+            di_var_bugs_summary[bugs_pass] += 1
+          else:
+            di_var_bugs_summary[bugs_pass] = 1
         else:
-          di_sp_bugs_summary[bugs_pass] = 1
-      elif bugs_metadata == "dbg-var-intrinsic":
-        try:
-          action = bug["action"]
-          fn_name = bug["fn-name"]
-          name = bug["name"]
-        except:
+          # Unsupported metadata.
           skipped_bugs += 1
           continue
-        di_var_bugs.append(DIVarBug(action, name, fn_name))
-
-        # Fill the summary dict.
-        if bugs_pass in di_var_bugs_summary:
-          di_var_bugs_summary[bugs_pass] += 1
-        else:
-          di_var_bugs_summary[bugs_pass] = 1
-      else:
-        # Unsupported metadata.
-        skipped_bugs += 1
-        continue
 
-    di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
-    di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
-    di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs
+      di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
+      di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
+      di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs
 
   generate_html_report(di_location_bugs, di_subprogram_bugs, di_variable_bugs, \
                        di_location_bugs_summary, di_sp_bugs_summary, \


        


More information about the llvm-commits mailing list