[libcxx-commits] [libcxx] 99382e4 - [libc++] Add utility to generate and display libc++'s header dependency
Eric Fiselier via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Feb 15 15:47:27 PST 2020
Author: Eric Fiselier
Date: 2020-02-15T18:47:17-05:00
New Revision: 99382e450fb56158a56db48f547c6e897fcf74aa
URL: https://github.com/llvm/llvm-project/commit/99382e450fb56158a56db48f547c6e897fcf74aa
DIFF: https://github.com/llvm/llvm-project/commit/99382e450fb56158a56db48f547c6e897fcf74aa.diff
LOG: [libc++] Add utility to generate and display libc++'s header dependency
graph.
Added:
libcxx/utils/graph_header_deps.py
libcxx/utils/libcxx/graph.py
Modified:
libcxx/utils/libcxx/util.py
Removed:
################################################################################
diff --git a/libcxx/utils/graph_header_deps.py b/libcxx/utils/graph_header_deps.py
new file mode 100755
index 000000000000..b6f0a250ccef
--- /dev/null
+++ b/libcxx/utils/graph_header_deps.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+from argparse import ArgumentParser
+import os
+import shutil
+import sys
+import shlex
+import json
+import re
+import libcxx.graph as dot
+import libcxx.util
+
+def print_and_exit(msg):
+ sys.stderr.write(msg + '\n')
+ sys.exit(1)
+
+def libcxx_include_path():
+ curr_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ include_dir = os.path.join(curr_dir, 'include')
+ return include_dir
+
+def get_libcxx_headers():
+ headers = []
+ include_dir = libcxx_include_path()
+ for fname in os.listdir(include_dir):
+ f = os.path.join(include_dir, fname)
+ if not os.path.isfile(f):
+ continue
+ base, ext = os.path.splitext(fname)
+ if (ext == '' or ext == '.h') and (not fname.startswith('__') or fname == '__config'):
+ headers += [f]
+ return headers
+
+
+def rename_headers_and_remove_test_root(graph):
+ inc_root = libcxx_include_path()
+ to_remove = set()
+ for n in graph.nodes:
+ assert 'label' in n.attributes
+ l = n.attributes['label']
+ if not l.startswith('/') and os.path.exists(os.path.join('/', l)):
+ l = '/' + l
+ if l.endswith('.tmp.cpp'):
+ to_remove.add(n)
+ if l.startswith(inc_root):
+ l = l[len(inc_root):]
+ if l.startswith('/'):
+ l = l[1:]
+ n.attributes['label'] = l
+ for n in to_remove:
+ graph.removeNode(n)
+
+def remove_non_std_headers(graph):
+ inc_root = libcxx_include_path()
+ to_remove = set()
+ for n in graph.nodes:
+ test_file = os.path.join(inc_root, n.attributes['label'])
+ if not test_file.startswith(inc_root):
+ to_remove.add(n)
+ for xn in to_remove:
+ graph.removeNode(xn)
+
+class DependencyCommand(object):
+ def __init__(self, compile_commands, output_dir, new_std=None):
+ output_dir = os.path.abspath(output_dir)
+ if not os.path.isdir(output_dir):
+ print_and_exit('"%s" must point to a directory' % output_dir)
+ self.output_dir = output_dir
+ self.new_std = new_std
+ cwd,bcmd = self._get_base_command(compile_commands)
+ self.cwd = cwd
+ self.base_cmd = bcmd
+
+ def run_for_headers(self, header_list):
+ outputs = []
+ for header in header_list:
+ header_name = os.path.basename(header)
+ out = os.path.join(self.output_dir, ('%s.dot' % header_name))
+ outputs += [out]
+ cmd = self.base_cmd + ["-fsyntax-only", "-Xclang", "-dependency-dot", "-Xclang", "%s" % out, '-xc++', '-']
+ libcxx.util.executeCommandOrDie(cmd, cwd=self.cwd, input='#include <%s>\n\n' % header_name)
+ return outputs
+
+ def _get_base_command(self, command_file):
+ commands = None
+ with open(command_file, 'r') as f:
+ commands = json.load(f)
+ for compile_cmd in commands:
+ file = compile_cmd['file']
+ if not file.endswith('src/algorithm.cpp'):
+ continue
+ wd = compile_cmd['directory']
+ cmd_str = compile_cmd['command']
+ cmd = shlex.split(cmd_str)
+ out_arg = cmd.index('-o')
+ del cmd[out_arg]
+ del cmd[out_arg]
+ in_arg = cmd.index('-c')
+ del cmd[in_arg]
+ del cmd[in_arg]
+ if self.new_std is not None:
+ for f in cmd:
+ if f.startswith('-std='):
+ del cmd[cmd.index(f)]
+ cmd += [self.new_std]
+ break
+ return wd, cmd
+ print_and_exit("failed to find command to build algorithm.cpp")
+
+def post_process_outputs(outputs, libcxx_only):
+ graphs = []
+ for dot_file in outputs:
+ g = dot.DirectedGraph.fromDotFile(dot_file)
+ rename_headers_and_remove_test_root(g)
+ if libcxx_only:
+ remove_non_std_headers(g)
+ graphs += [g]
+ g.toDotFile(dot_file)
+ return graphs
+
+def build_canonical_names(graphs):
+ canonical_names = {}
+ next_idx = 0
+ for g in graphs:
+ for n in g.nodes:
+ if n.attributes['label'] not in canonical_names:
+ name = 'header_%d' % next_idx
+ next_idx += 1
+ canonical_names[n.attributes['label']] = name
+ return canonical_names
+
+
+
+class CanonicalGraphBuilder(object):
+ def __init__(self, graphs):
+ self.graphs = list(graphs)
+ self.canonical_names = build_canonical_names(graphs)
+
+ def build(self):
+ self.canonical = dot.DirectedGraph('all_headers')
+ for k,v in self.canonical_names.iteritems():
+ n = dot.Node(v, edges=[], attributes={'shape': 'box', 'label': k})
+ self.canonical.addNode(n)
+ for g in self.graphs:
+ self._merge_graph(g)
+ return self.canonical
+
+ def _merge_graph(self, g):
+ for n in g.nodes:
+ new_name = self.canonical.getNodeByLabel(n.attributes['label']).id
+ for e in n.edges:
+ to_node = self.canonical.getNodeByLabel(e.attributes['label']).id
+ self.canonical.addEdge(new_name, to_node)
+
+
+def main():
+ parser = ArgumentParser(
+ description="Generate a graph of libc++ header dependencies")
+ parser.add_argument(
+ '-v', '--verbose', dest='verbose', action='store_true', default=False)
+ parser.add_argument(
+ '-o', '--output', dest='output', required=True,
+ help='The output file. stdout is used if not given',
+ type=str, action='store')
+ parser.add_argument(
+ '--no-compile', dest='no_compile', action='store_true', default=False)
+ parser.add_argument(
+ '--libcxx-only', dest='libcxx_only', action='store_true', default=False)
+ parser.add_argument(
+ 'compile_commands', metavar='compile-commands-file',
+ help='the compile commands database')
+
+ args = parser.parse_args()
+ builder = DependencyCommand(args.compile_commands, args.output, new_std='-std=c++2a')
+ if not args.no_compile:
+ outputs = builder.run_for_headers(get_libcxx_headers())
+ graphs = post_process_outputs(outputs, args.libcxx_only)
+ else:
+ outputs = [os.path.join(args.output, l) for l in os.listdir(args.output) if not l.endswith('all_headers.dot')]
+ graphs = [dot.DirectedGraph.fromDotFile(o) for o in outputs]
+
+ canon = CanonicalGraphBuilder(graphs).build()
+ canon.toDotFile(os.path.join(args.output, 'all_headers.dot'))
+ all_graphs = graphs + [canon]
+
+ found_cycles = False
+ for g in all_graphs:
+ cycle_finder = dot.CycleFinder(g)
+ all_cycles = cycle_finder.findCyclesInGraph()
+ if len(all_cycles):
+ found_cycles = True
+ print("cycle in graph %s" % g.name)
+ for start, path in all_cycles:
+ print("Cycle for %s = %s" % (start, path))
+ if not found_cycles:
+ print("No cycles found")
+
+
+
+if __name__ == '__main__':
+ main()
diff --git a/libcxx/utils/libcxx/graph.py b/libcxx/utils/libcxx/graph.py
new file mode 100644
index 000000000000..681d3ad2568f
--- /dev/null
+++ b/libcxx/utils/libcxx/graph.py
@@ -0,0 +1,298 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+import platform
+import os
+from collections import defaultdict
+import re
+import libcxx.util
+
+
+class DotEmitter(object):
+ def __init__(self, name):
+ self.name = name
+ self.node_strings = {}
+ self.edge_strings = []
+
+ def addNode(self, node):
+ res = str(node.id)
+ if len(node.attributes):
+ attr_strs = []
+ for k,v in node.attributes.iteritems():
+ attr_strs += ['%s="%s"' % (k, v)]
+ res += ' [ %s ]' % (', '.join(attr_strs))
+ res += ';'
+ assert node.id not in self.node_strings
+ self.node_strings[node.id] = res
+
+ def addEdge(self, n1, n2):
+ res = '%s -> %s;' % (n1.id, n2.id)
+ self.edge_strings += [res]
+
+ def node_key(self, n):
+ id = n.id
+ assert id.startswith('\w*\d+')
+
+ def emit(self):
+ node_definitions_list = []
+ sorted_keys = self.node_strings.keys()
+ sorted_keys.sort()
+ for k in sorted_keys:
+ node_definitions_list += [self.node_strings[k]]
+ node_definitions = '\n '.join(node_definitions_list)
+ edge_list = '\n '.join(self.edge_strings)
+ return '''
+digraph "{name}" {{
+ {node_definitions}
+ {edge_list}
+}}
+'''.format(name=self.name, node_definitions=node_definitions, edge_list=edge_list).strip()
+
+
+class DotReader(object):
+ def __init__(self):
+ self.graph = DirectedGraph(None)
+
+ def abortParse(self, msg="bad input"):
+ raise Exception(msg)
+
+ def parse(self, data):
+ lines = [l.strip() for l in data.splitlines() if l.strip()]
+ maxIdx = len(lines)
+ idx = 0
+ if not self.parseIntroducer(lines[idx]):
+ self.abortParse('failed to parse introducer')
+ idx += 1
+ while idx < maxIdx:
+ if self.parseNodeDefinition(lines[idx]) or self.parseEdgeDefinition(lines[idx]):
+ idx += 1
+ continue
+ else:
+ break
+ if idx == maxIdx or not self.parseCloser(lines[idx]):
+ self.abortParse("no closing } found")
+ return self.graph
+
+ def parseEdgeDefinition(self, l):
+ edge_re = re.compile('^\s*(\w+)\s+->\s+(\w+);\s*$')
+ m = edge_re.match(l)
+ if not m:
+ return False
+ n1 = m.group(1)
+ n2 = m.group(2)
+ self.graph.addEdge(n1, n2)
+ return True
+
+ def parseAttributes(self, raw_str):
+ attribute_re = re.compile('^\s*(\w+)="([^"]+)"')
+ parts = [l.strip() for l in raw_str.split(',') if l.strip()]
+ attribute_dict = {}
+ for a in parts:
+ m = attribute_re.match(a)
+ if not m:
+ self.abortParse('Bad attribute "%s"' % a)
+ attribute_dict[m.group(1)] = m.group(2)
+ return attribute_dict
+
+ def parseNodeDefinition(self, l):
+ node_definition_re = re.compile('^\s*(\w+)\s+\[([^\]]+)\]\s*;\s*$')
+ m = node_definition_re.match(l)
+ if not m:
+ return False
+ id = m.group(1)
+ attributes = self.parseAttributes(m.group(2))
+ n = Node(id, edges=[], attributes=attributes)
+ self.graph.addNode(n)
+ return True
+
+ def parseIntroducer(self, l):
+ introducer_re = re.compile('^\s*digraph "([^"]+)"\s+{\s*$')
+ m = introducer_re.match(l)
+ if not m:
+ return False
+ self.graph.setName(m.group(1))
+ return True
+
+ def parseCloser(self, l):
+ closer_re = re.compile('^\s*}\s*$')
+ m = closer_re.match(l)
+ if not m:
+ return False
+ return True
+
+class Node(object):
+ def __init__(self, id, edges=[], attributes={}):
+ self.id = id
+ self.edges = set(edges)
+ self.attributes = dict(attributes)
+
+ def addEdge(self, dest):
+ self.edges.add(dest)
+
+ def __eq__(self, another):
+ if isinstance(another, str):
+ return another == self.id
+ return hasattr(another, 'id') and self.id == another.id
+
+ def __hash__(self):
+ return hash(self.id)
+
+ def __str__(self):
+ return self.attributes["label"]
+
+ def __repr__(self):
+ return self.__str__()
+ res = self.id
+ if len(self.attributes):
+ attr = []
+ for k,v in self.attributes.iteritems():
+ attr += ['%s="%s"' % (k, v)]
+ res += ' [%s ]' % (', '.join(attr))
+ return res
+
+class DirectedGraph(object):
+ def __init__(self, name=None, nodes=None):
+ self.name = name
+ self.nodes = set() if nodes is None else set(nodes)
+
+ def setName(self, n):
+ self.name = n
+
+ def _getNode(self, n_or_id):
+ if isinstance(n_or_id, Node):
+ return n_or_id
+ return self.getNode(n_or_id)
+
+ def getNode(self, str_id):
+ assert isinstance(str_id, str) or isinstance(str_id, Node)
+ for s in self.nodes:
+ if s == str_id:
+ return s
+ return None
+
+ def getNodeByLabel(self, l):
+ found = None
+ for s in self.nodes:
+ if s.attributes['label'] == l:
+ assert found is None
+ found = s
+ return found
+
+ def addEdge(self, n1, n2):
+ n1 = self._getNode(n1)
+ n2 = self._getNode(n2)
+ assert n1 in self.nodes
+ assert n2 in self.nodes
+ n1.addEdge(n2)
+
+ def addNode(self, n):
+ self.nodes.add(n)
+
+ def removeNode(self, n):
+ n = self._getNode(n)
+ for other_n in self.nodes:
+ if other_n == n:
+ continue
+ new_edges = set()
+ for e in other_n.edges:
+ if e != n:
+ new_edges.add(e)
+ other_n.edges = new_edges
+ self.nodes.remove(n)
+
+ def toDot(self):
+ dot = DotEmitter(self.name)
+ for n in self.nodes:
+ dot.addNode(n)
+ for ndest in n.edges:
+ dot.addEdge(n, ndest)
+ return dot.emit()
+
+ @staticmethod
+ def fromDot(str):
+ reader = DotReader()
+ graph = reader.parse(str)
+ return graph
+
+ @staticmethod
+ def fromDotFile(fname):
+ with open(fname, 'r') as f:
+ return DirectedGraph.fromDot(f.read())
+
+ def toDotFile(self, fname):
+ with open(fname, 'w') as f:
+ f.write(self.toDot())
+
+ def __repr__(self):
+ return self.toDot()
+
+class BFS(object):
+ def __init__(self, start):
+ self.visited = set()
+ self.to_visit = []
+ self.start = start
+
+ def __nonzero__(self):
+ return len(self.to_visit) != 0
+
+ def empty(self):
+ return len(self.to_visit) == 0
+
+ def push_back(self, node):
+ assert node not in self.visited
+ self.visited.add(node)
+ self.to_visit += [node]
+
+ def maybe_push_back(self, node):
+ if node in self.visited:
+ return
+ self.push_back(node)
+
+ def pop_front(self):
+ assert len(self.to_visit)
+ elem = self.to_visit[0]
+ del self.to_visit[0]
+ return elem
+
+ def seen(self, n):
+ return n in self.visited
+
+
+
+class CycleFinder(object):
+ def __init__(self, graph):
+ self.graph = graph
+
+ def findCycleForNode(self, n):
+ assert n in self.graph.nodes
+ all_paths = {}
+ all_cycles = []
+ bfs = BFS(n)
+ bfs.push_back(n)
+ all_paths[n] = [n]
+ while bfs:
+ n = bfs.pop_front()
+ assert n in all_paths
+ for e in n.edges:
+ en = self.graph.getNode(e)
+ if not bfs.seen(en):
+ new_path = list(all_paths[n])
+ new_path.extend([en])
+ all_paths[en] = new_path
+ bfs.push_back(en)
+ if en == bfs.start:
+ all_cycles += [all_paths[n]]
+ return all_cycles
+
+ def findCyclesInGraph(self):
+ all_cycles = []
+ for n in self.graph.nodes:
+ cycle = self.findCycleForNode(n)
+ if cycle:
+ all_cycles += [(n, cycle)]
+ return all_cycles
diff --git a/libcxx/utils/libcxx/util.py b/libcxx/utils/libcxx/util.py
index 2fd95232abb4..8c93f392ed32 100644
--- a/libcxx/utils/libcxx/util.py
+++ b/libcxx/utils/libcxx/util.py
@@ -286,3 +286,16 @@ def executeCommandVerbose(cmd, *args, **kwargs):
report += "\n\nFailed!"
sys.stderr.write('%s\n' % report)
return out, err, exitCode
+
+
+def executeCommandOrDie(cmd, *args, **kwargs):
+ """
+ Execute a command and print its output on failure.
+ """
+ out, err, exitCode = executeCommand(cmd, *args, **kwargs)
+ if exitCode != 0:
+ report = makeReport(cmd, out, err, exitCode)
+ report += "\n\nFailed!"
+ sys.stderr.write('%s\n' % report)
+ sys.exit(exitCode)
+ return out, err, exitCode
More information about the libcxx-commits
mailing list