r363898 - [analyzer] exploded-graph-rewriter: Implement a --diff mode.

Artem Dergachev via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 19 16:34:00 PDT 2019


Author: dergachev
Date: Wed Jun 19 16:33:59 2019
New Revision: 363898

URL: http://llvm.org/viewvc/llvm-project?rev=363898&view=rev
Log:
[analyzer] exploded-graph-rewriter: Implement a --diff mode.

In this mode the tool would avoid duplicating the contents of the
program state on every node, replacing them with a diff-like dump
of changes that happened on that node.

This is useful because most of the time we only interested in whether
the effect of the statement was modeled correctly. A diffed graph would
also be much faster to load and navigate, being much smaller than
the original graph.

The diffs are computed "semantically" as opposed to plain text diffs.
I.e., the diff algorithm is hand-crafted separately for every state trait,
taking the underlying data structures into account. This is especially nice
for Environment because textual diffs would have been terrible.
On the other hand, it requires some boilerplate to implement.

Differential Revision: https://reviews.llvm.org/D62761

Added:
    cfe/trunk/test/Analysis/exploded-graph-rewriter/environment_diff.dot
    cfe/trunk/test/Analysis/exploded-graph-rewriter/store_diff.dot
Modified:
    cfe/trunk/test/Analysis/exploded-graph-rewriter/program_points.dot
    cfe/trunk/test/Analysis/exploded-graph-rewriter/store.dot
    cfe/trunk/utils/analyzer/exploded-graph-rewriter.py

Added: cfe/trunk/test/Analysis/exploded-graph-rewriter/environment_diff.dot
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/exploded-graph-rewriter/environment_diff.dot?rev=363898&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/exploded-graph-rewriter/environment_diff.dot (added)
+++ cfe/trunk/test/Analysis/exploded-graph-rewriter/environment_diff.dot Wed Jun 19 16:33:59 2019
@@ -0,0 +1,110 @@
+// RUN: %exploded_graph_rewriter -d %s | FileCheck %s
+
+// FIXME: Substitution doesn't seem to work on Windows.
+// UNSUPPORTED: system-windows
+
+// No diffs on the first node, nothing to check.
+Node0x1 [shape=record,label=
+ "{
+    { "node_id": 1,
+      "pointer": "0x1",
+      "state_id": 2,
+      "program_points": [],
+      "program_state": {
+        "store": null,
+        "environment": [
+          {
+            "location_context": "#0 Call",
+            "lctx_id": 3,
+            "calling": "foo",
+            "call_line": 4,
+            "items": [
+              {
+                "stmt_id": 5,
+                "pretty": "bar()",
+                "value": "Unknown"
+              }
+            ]
+          }
+        ]
+      }
+    }
+\l}"];
+
+Node0x1 -> Node0x6;
+
+// CHECK: Node0x6 [
+// CHECK-SAME: <tr>
+// CHECK-SAME:   <td><font color="red">-</font></td>
+// CHECK-SAME:   <td align="left"><i>S5</i></td>
+// CHECK-SAME:   <td align="left">bar()</td>
+// CHECK-SAME:   <td align="left">Unknown</td>
+// CHECK-SAME: </tr>
+// CHECK-SAME: <tr>
+// CHECK-SAME:   <td><font color="forestgreen">+</font></td>
+// CHECK-SAME:   <td align="left"><i>S9</i></td>
+// CHECK-SAME:   <td align="left">baz()</td>
+// CHECK-SAME:   <td align="left">Undefined</td>
+// CHECK-SAME: </tr>
+Node0x6 [shape=record,label=
+ "{
+    { "node_id": 6,
+      "pointer": "0x6",
+      "state_id": 7,
+      "program_points": [],
+      "program_state": {
+        "store": null,
+        "environment": [
+          {
+            "location_context": "#0 Call",
+            "lctx_id": 3,
+            "calling": "foo",
+            "call_line": 4,
+            "items": [
+              {
+                "stmt_id": 9,
+                "pretty": "baz()",
+                "value": "Undefined"
+              }
+            ]
+          }
+        ]
+      }
+    }
+\l}"];
+
+Node0x6 -> Node0x9;
+// Make sure that the last node in the path is not diffed.
+// CHECK: Node0x9 [
+// CHECK-SAME: <tr>
+// CHECK-SAME:   <td></td>
+// CHECK-SAME:   <td align="left"><i>S9</i></td>
+// CHECK-SAME:   <td align="left">baz()</td>
+// CHECK-SAME:   <td align="left">Undefined</td>
+// CHECK-SAME: </tr>
+Node0x9 [shape=record,label=
+ "{
+    { "node_id": 9,
+      "pointer": "0x9",
+      "state_id": 7,
+      "program_points": [],
+      "program_state": {
+        "store": null,
+        "environment": [
+          {
+            "location_context": "#0 Call",
+            "lctx_id": 3,
+            "calling": "foo",
+            "call_line": 4,
+            "items": [
+              {
+                "stmt_id": 9,
+                "pretty": "baz()",
+                "value": "Undefined"
+              }
+            ]
+          }
+        ]
+      }
+    }
+\l}"];

Modified: cfe/trunk/test/Analysis/exploded-graph-rewriter/program_points.dot
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/exploded-graph-rewriter/program_points.dot?rev=363898&r1=363897&r2=363898&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/exploded-graph-rewriter/program_points.dot (original)
+++ cfe/trunk/test/Analysis/exploded-graph-rewriter/program_points.dot Wed Jun 19 16:33:59 2019
@@ -7,7 +7,6 @@
 // CHECK-SAME: <table border="0" align="left" width="0">
 // CHECK-SAME:   <tr>
 // CHECK-SAME:     <td width="0">
-// CHECK-SAME:       -
 // CHECK-SAME:     </td>
 // CHECK-SAME:     <td align="left" width="0">
 // CHECK-SAME:       <font color="gold3">Edge</font>

Modified: cfe/trunk/test/Analysis/exploded-graph-rewriter/store.dot
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/exploded-graph-rewriter/store.dot?rev=363898&r1=363897&r2=363898&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/exploded-graph-rewriter/store.dot (original)
+++ cfe/trunk/test/Analysis/exploded-graph-rewriter/store.dot Wed Jun 19 16:33:59 2019
@@ -31,6 +31,7 @@ Node0x1 [shape=record,label=
         "store": [
           {
             "cluster": "x",
+            "pointer": "0x3",
             "items": [
               {
                 "kind": "Default",

Added: cfe/trunk/test/Analysis/exploded-graph-rewriter/store_diff.dot
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/exploded-graph-rewriter/store_diff.dot?rev=363898&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/exploded-graph-rewriter/store_diff.dot (added)
+++ cfe/trunk/test/Analysis/exploded-graph-rewriter/store_diff.dot Wed Jun 19 16:33:59 2019
@@ -0,0 +1,82 @@
+// RUN: %exploded_graph_rewriter -d %s | FileCheck %s
+
+// FIXME: Substitution doesn't seem to work on Windows.
+// UNSUPPORTED: system-windows
+
+Node0x1 [shape=record,label=
+ "{
+    { "node_id": 1,
+      "pointer": "0x1",
+      "state_id": 2,
+      "program_points": [],
+      "program_state": {
+        "environment": null,
+        "store": [
+          {
+            "cluster": "x",
+            "pointer": "0x3",
+            "items": [
+              {
+                "kind": "Default",
+                "offset": 0,
+                "value": "Undefined"
+              }
+            ]
+          }
+        ]
+      }
+    }
+\l}"];
+
+Node0x1 -> Node0x4;
+
+// CHECK: Node0x4 [
+// CHECK-SAME: <tr>
+// CHECK-SAME:   <td><font color="red">-</font></td>
+// CHECK-SAME:   <td align="left">x</td><td align="left">0</td>
+// CHECK-SAME:   <td align="left">(<i>Default</i>)</td>
+// CHECK-SAME:   <td align="left">Undefined</td>
+// CHECK-SAME: </tr>
+// CHECK-SAME: <tr>
+// CHECK-SAME:   <td><font color="forestgreen">+</font></td>
+// CHECK-SAME:   <td align="left">x</td>
+// CHECK-SAME:   <td align="left">0</td>
+// CHECK-SAME:   <td align="left">(<i>Default</i>)</td>
+// CHECK-SAME:   <td align="left">Unknown</td>
+// CHECK-SAME: </tr>
+Node0x4 [shape=record,label=
+ "{
+    { "node_id": 4,
+      "pointer": "0x4",
+      "state_id": 5,
+      "program_points": [],
+      "program_state": {
+        "environment": null,
+        "store": [
+          {
+            "cluster": "x",
+            "pointer": "0x3",
+            "items": [
+              {
+                "kind": "Default",
+                "offset": 0,
+                "value": "Unknown"
+              }
+            ]
+          }
+        ]
+      }
+    }
+\l}"];
+
+Node0x4 -> Node0x6;
+
+Node0x6 [shape=record,label=
+ "{
+    { "node_id": 6,
+      "pointer": "0x6",
+      "state_id": 7,
+      "program_points": [],
+      "program_state": null
+    }
+\l}"];

Modified: cfe/trunk/utils/analyzer/exploded-graph-rewriter.py
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/analyzer/exploded-graph-rewriter.py?rev=363898&r1=363897&r2=363898&view=diff
==============================================================================
--- cfe/trunk/utils/analyzer/exploded-graph-rewriter.py (original)
+++ cfe/trunk/utils/analyzer/exploded-graph-rewriter.py Wed Jun 19 16:33:59 2019
@@ -18,6 +18,13 @@ import logging
 import re
 
 
+# A helper function for finding the difference between two dictionaries.
+def diff_dicts(curr, prev):
+    removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
+    added = [k for k in curr if k not in prev or curr[k] != prev[k]]
+    return (removed, added)
+
+
 # A deserialized source location.
 class SourceLocation(object):
     def __init__(self, json_loc):
@@ -47,13 +54,21 @@ class ProgramPoint(object):
             self.block_id = json_pp['block_id']
 
 
-# A value of a single expression in a deserialized Environment.
-class EnvironmentBinding(object):
-    def __init__(self, json_eb):
-        super(EnvironmentBinding, self).__init__()
-        self.stmt_id = json_eb['stmt_id']
-        self.pretty = json_eb['pretty']
-        self.value = json_eb['value']
+# A single expression acting as a key in a deserialized Environment.
+class EnvironmentBindingKey(object):
+    def __init__(self, json_ek):
+        super(EnvironmentBindingKey, self).__init__()
+        self.stmt_id = json_ek['stmt_id']
+        self.pretty = json_ek['pretty']
+
+    def _key(self):
+        return self.stmt_id
+
+    def __eq__(self, other):
+        return self._key() == other._key()
+
+    def __hash__(self):
+        return hash(self._key())
 
 
 # Deserialized description of a location context.
@@ -65,6 +80,15 @@ class LocationContext(object):
         self.decl = json_frame['calling']
         self.line = json_frame['call_line']
 
+    def _key(self):
+        return self.lctx_id
+
+    def __eq__(self, other):
+        return self._key() == other._key()
+
+    def __hash__(self):
+        return hash(self._key())
+
 
 # A group of deserialized Environment bindings that correspond to a specific
 # location context.
@@ -72,8 +96,17 @@ class EnvironmentFrame(object):
     def __init__(self, json_frame):
         super(EnvironmentFrame, self).__init__()
         self.location_context = LocationContext(json_frame)
-        self.bindings = [EnvironmentBinding(b) for b in json_frame['items']] \
-            if json_frame['items'] is not None else []
+        self.bindings = collections.OrderedDict(
+            [(EnvironmentBindingKey(b),
+              b['value']) for b in json_frame['items']]
+            if json_frame['items'] is not None else [])
+
+    def diff_bindings(self, prev):
+        return diff_dicts(self.bindings, prev.bindings)
+
+    def is_different(self, prev):
+        removed, added = self.diff_bindings(prev)
+        return len(removed) != 0 or len(added) != 0
 
 
 # A deserialized Environment.
@@ -82,14 +115,46 @@ class Environment(object):
         super(Environment, self).__init__()
         self.frames = [EnvironmentFrame(f) for f in json_e]
 
+    def diff_frames(self, prev):
+        # TODO: It's difficult to display a good diff when frame numbers shift.
+        if len(self.frames) != len(prev.frames):
+            return None
+
+        updated = []
+        for i in range(len(self.frames)):
+            f = self.frames[i]
+            prev_f = prev.frames[i]
+            if f.location_context == prev_f.location_context:
+                if f.is_different(prev_f):
+                    updated.append(i)
+            else:
+                # We have the whole frame replaced with another frame.
+                # TODO: Produce a nice diff.
+                return None
+
+        # TODO: Add support for added/removed.
+        return updated
+
+    def is_different(self, prev):
+        updated = self.diff_frames(prev)
+        return updated is None or len(updated) > 0
+
+
+# A single binding key in a deserialized RegionStore cluster.
+class StoreBindingKey(object):
+    def __init__(self, json_sk):
+        super(StoreBindingKey, self).__init__()
+        self.kind = json_sk['kind']
+        self.offset = json_sk['offset']
+
+    def _key(self):
+        return (self.kind, self.offset)
 
-# A single binding in a deserialized RegionStore cluster.
-class StoreBinding(object):
-    def __init__(self, json_sb):
-        super(StoreBinding, self).__init__()
-        self.kind = json_sb['kind']
-        self.offset = json_sb['offset']
-        self.value = json_sb['value']
+    def __eq__(self, other):
+        return self._key() == other._key()
+
+    def __hash__(self):
+        return hash(self._key())
 
 
 # A single cluster of the deserialized RegionStore.
@@ -97,14 +162,34 @@ class StoreCluster(object):
     def __init__(self, json_sc):
         super(StoreCluster, self).__init__()
         self.base_region = json_sc['cluster']
-        self.bindings = [StoreBinding(b) for b in json_sc['items']]
+        self.bindings = collections.OrderedDict(
+            [(StoreBindingKey(b), b['value']) for b in json_sc['items']])
+
+    def diff_bindings(self, prev):
+        return diff_dicts(self.bindings, prev.bindings)
+
+    def is_different(self, prev):
+        removed, added = self.diff_bindings(prev)
+        return len(removed) != 0 or len(added) != 0
 
 
 # A deserialized RegionStore.
 class Store(object):
     def __init__(self, json_s):
         super(Store, self).__init__()
-        self.clusters = [StoreCluster(c) for c in json_s]
+        self.clusters = collections.OrderedDict(
+            [(c['pointer'], StoreCluster(c)) for c in json_s])
+
+    def diff_clusters(self, prev):
+        removed = [k for k in prev.clusters if k not in self.clusters]
+        added = [k for k in self.clusters if k not in prev.clusters]
+        updated = [k for k in prev.clusters if k in self.clusters
+                   and prev.clusters[k].is_different(self.clusters[k])]
+        return (removed, added, updated)
+
+    def is_different(self, prev):
+        removed, added, updated = self.diff_clusters(prev)
+        return len(removed) != 0 or len(added) != 0 or len(updated) != 0
 
 
 # A deserialized program state.
@@ -213,8 +298,9 @@ class ExplodedGraph(object):
 # A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
 # syntax highlighing.
 class DotDumpVisitor(object):
-    def __init__(self):
+    def __init__(self, do_diffs):
         super(DotDumpVisitor, self).__init__()
+        self._do_diffs = do_diffs
 
     @staticmethod
     def _dump_raw(s):
@@ -230,6 +316,14 @@ class DotDumpVisitor(object):
                .replace('\\l', '<br />')
                .replace('|', ''), end='')
 
+    @staticmethod
+    def _diff_plus_minus(is_added):
+        if is_added is None:
+            return ''
+        if is_added:
+            return '<font color="forestgreen">+</font>'
+        return '<font color="red">-</font>'
+
     def visit_begin_graph(self, graph):
         self._graph = graph
         self._dump_raw('digraph "ExplodedGraph" {\n')
@@ -263,72 +357,128 @@ class DotDumpVisitor(object):
                            '<font color="%s">%s</font></td><td>%s</td></tr>'
                            % (color, p.stmt_kind, p.pretty))
         elif p.kind == 'Edge':
-            self._dump('<tr><td width="0">-</td>'
+            self._dump('<tr><td width="0"></td>'
                        '<td align="left" width="0">'
                        '<font color="%s">%s</font></td><td align="left">'
                        '[B%d] -\\> [B%d]</td></tr>'
                        % (color, p.kind, p.src_id, p.dst_id))
         else:
             # TODO: Print more stuff for other kinds of points.
-            self._dump('<tr><td width="0">-</td>'
+            self._dump('<tr><td width="0"></td>'
                        '<td align="left" width="0" colspan="2">'
                        '<font color="%s">%s</font></td></tr>'
                        % (color, p.kind))
 
-    def visit_environment(self, e):
+    def visit_environment(self, e, prev_e=None):
         self._dump('<table border="0">')
 
-        for f in e.frames:
-            self._dump('<tr><td align="left"><b>%s</b></td>'
+        def dump_location_context(lc, is_added=None):
+            self._dump('<tr><td>%s</td>'
+                       '<td align="left"><b>%s</b></td>'
                        '<td align="left"><font color="grey60">%s </font>'
                        '%s</td></tr>'
-                       % (f.location_context.caption,
-                          f.location_context.decl,
-                          ('(line %s)' % f.location_context.line)
-                          if f.location_context.line is not None else ''))
-            for b in f.bindings:
-                self._dump('<tr><td align="left"><i>S%s</i></td>'
-                           '<td align="left">%s</td>'
-                           '<td align="left">%s</td></tr>'
-                           % (b.stmt_id, b.pretty, b.value))
+                       % (self._diff_plus_minus(is_added),
+                          lc.caption, lc.decl,
+                          ('(line %s)' % lc.line) if lc.line is not None
+                          else ''))
+
+        def dump_binding(f, b, is_added=None):
+            self._dump('<tr><td>%s</td>'
+                       '<td align="left"><i>S%s</i></td>'
+                       '<td align="left">%s</td>'
+                       '<td align="left">%s</td></tr>'
+                       % (self._diff_plus_minus(is_added),
+                          b.stmt_id, b.pretty, f.bindings[b]))
+
+        frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
+        if frames_updated:
+            for i in frames_updated:
+                f = e.frames[i]
+                prev_f = prev_e.frames[i]
+                dump_location_context(f.location_context)
+                bindings_removed, bindings_added = f.diff_bindings(prev_f)
+                for b in bindings_removed:
+                    dump_binding(prev_f, b, False)
+                for b in bindings_added:
+                    dump_binding(f, b, True)
+        else:
+            for f in e.frames:
+                dump_location_context(f.location_context)
+                for b in f.bindings:
+                    dump_binding(f, b)
 
         self._dump('</table>')
 
-    def visit_store(self, s):
+    def visit_store(self, s, prev_s=None):
         self._dump('<table border="0">')
 
-        for c in s.clusters:
-            for b in c.bindings:
-                self._dump('<tr><td align="left">%s</td>'
-                           '<td align="left">%s</td>'
-                           '<td align="left">%s</td>'
-                           '<td align="left">%s</td></tr>'
-                           % (c.base_region, b.offset,
-                              '(<i>Default</i>)' if b.kind == 'Default'
-                              else '',
-                              b.value))
+        def dump_binding(s, c, b, is_added=None):
+            self._dump('<tr><td>%s</td>'
+                       '<td align="left">%s</td>'
+                       '<td align="left">%s</td>'
+                       '<td align="left">%s</td>'
+                       '<td align="left">%s</td></tr>'
+                       % (self._diff_plus_minus(is_added),
+                          s.clusters[c].base_region, b.offset,
+                          '(<i>Default</i>)' if b.kind == 'Default'
+                          else '',
+                          s.clusters[c].bindings[b]))
+
+        if prev_s is not None:
+            clusters_removed, clusters_added, clusters_updated = \
+                s.diff_clusters(prev_s)
+            for c in clusters_removed:
+                for b in prev_s.clusters[c].bindings:
+                    dump_binding(prev_s, c, b, False)
+            for c in clusters_updated:
+                bindings_removed, bindings_added = \
+                    s.clusters[c].diff_bindings(prev_s.clusters[c])
+                for b in bindings_removed:
+                    dump_binding(prev_s, c, b, False)
+                for b in bindings_added:
+                    dump_binding(s, c, b, True)
+            for c in clusters_added:
+                for b in s.clusters[c].bindings:
+                    dump_binding(s, c, b, True)
+        else:
+            for c in s.clusters:
+                for b in s.clusters[c].bindings:
+                    dump_binding(s, c, b)
 
         self._dump('</table>')
 
-    def visit_state(self, s):
-        self._dump('<tr><td align="left">'
-                   '<b>Store: </b>')
+    def visit_state(self, s, prev_s):
+        # == Store ==
+        self._dump('<tr><td align="left"><b>Store: </b>')
         if s.store is None:
             self._dump('<i> Nothing!</i>')
         else:
-            self._dump('</td></tr>'
-                       '<tr><td align="left">')
-            self.visit_store(s.store)
+            if prev_s is not None and prev_s.store is not None:
+                if s.store.is_different(prev_s.store):
+                    self._dump('</td></tr><tr><td align="left">')
+                    self.visit_store(s.store, prev_s.store)
+                else:
+                    self._dump('<i> No changes!</i>')
+            else:
+                self._dump('</td></tr><tr><td align="left">')
+                self.visit_store(s.store)
+        self._dump('</td></tr><hr />')
 
-        self._dump('</td></tr><hr />'
-                   '<tr><td align="left">'
+        # == Environment ==
+        self._dump('<tr><td align="left">'
                    '<b>Environment: </b>')
         if s.environment is None:
             self._dump('<i> Nothing!</i>')
         else:
-            self._dump('</td></tr>'
-                       '<tr><td align="left">')
-            self.visit_environment(s.environment)
+            if prev_s is not None and prev_s.environment is not None:
+                if s.environment.is_different(prev_s.environment):
+                    self._dump('</td></tr><tr><td align="left">')
+                    self.visit_environment(s.environment, prev_s.environment)
+                else:
+                    self._dump('<i> No changes!</i>')
+            else:
+                self._dump('</td></tr><tr><td align="left">')
+                self.visit_environment(s.environment)
 
         self._dump('</td></tr>')
 
@@ -353,7 +503,14 @@ class DotDumpVisitor(object):
 
         if node.state is not None:
             self._dump('<hr />')
-            self.visit_state(node.state)
+            prev_s = None
+            # Do diffs only when we have a unique predecessor.
+            # Don't do diffs on the leaf nodes because they're
+            # the important ones.
+            if self._do_diffs and len(node.predecessors) == 1 \
+               and len(node.successors) > 0:
+                prev_s = self._graph.nodes[node.predecessors[0]].state
+            self.visit_state(node.state, prev_s)
         self._dump_raw('</table>>];\n')
 
     def visit_edge(self, pred, succ):
@@ -383,13 +540,13 @@ class Explorer(object):
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('filename', type=str)
-    parser.add_argument('-d', '--debug', action='store_const', dest='loglevel',
-                        const=logging.DEBUG, default=logging.WARNING,
-                        help='enable debug prints')
     parser.add_argument('-v', '--verbose', action='store_const',
-                        dest='loglevel', const=logging.INFO,
+                        dest='loglevel', const=logging.DEBUG,
                         default=logging.WARNING,
                         help='enable info prints')
+    parser.add_argument('-d', '--diff', action='store_const', dest='diff',
+                        const=True, default=False,
+                        help='display differences between states')
     args = parser.parse_args()
     logging.basicConfig(level=args.loglevel)
 
@@ -400,7 +557,7 @@ def main():
             graph.add_raw_line(raw_line)
 
     explorer = Explorer()
-    visitor = DotDumpVisitor()
+    visitor = DotDumpVisitor(args.diff)
     explorer.explore(graph, visitor)
 
 




More information about the cfe-commits mailing list