[llvm] r341120 - Add a utility script to stress test the demangler.
Zachary Turner via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 30 13:53:48 PDT 2018
Author: zturner
Date: Thu Aug 30 13:53:48 2018
New Revision: 341120
URL: http://llvm.org/viewvc/llvm-project?rev=341120&view=rev
Log:
Add a utility script to stress test the demangler.
Added:
llvm/trunk/utils/demangle_tree.py
Added: llvm/trunk/utils/demangle_tree.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/demangle_tree.py?rev=341120&view=auto
==============================================================================
--- llvm/trunk/utils/demangle_tree.py (added)
+++ llvm/trunk/utils/demangle_tree.py Thu Aug 30 13:53:48 2018
@@ -0,0 +1,226 @@
+# Given a path to llvm-objdump and a directory tree, spider the directory tree
+# dumping every object file encountered with correct options needed to demangle
+# symbols in the object file, and collect statistics about failed / crashed
+# demanglings. Useful for stress testing the demangler against a large corpus
+# of inputs.
+
+import argparse
+import functools
+import os
+import re
+import sys
+import subprocess
+import traceback
+from multiprocessing import Pool
+import multiprocessing
+
+args = None
+
+def parse_line(line):
+ question = line.find('?')
+ if question == -1:
+ return None, None
+
+ open_paren = line.find('(', question)
+ if open_paren == -1:
+ return None, None
+ close_paren = line.rfind(')', open_paren)
+ if open_paren == -1:
+ return None, None
+ mangled = line[question : open_paren]
+ demangled = line[open_paren+1 : close_paren]
+ return mangled.strip(), demangled.strip()
+
+class Result(object):
+ def __init__(self):
+ self.crashed = []
+ self.file = None
+ self.nsymbols = 0
+ self.errors = set()
+ self.nfiles = 0
+
+class MapContext(object):
+ def __init__(self):
+ self.rincomplete = None
+ self.rcumulative = Result()
+ self.pending_objs = []
+ self.npending = 0
+
+def process_file(path, objdump):
+ r = Result()
+ r.file = path
+
+ popen_args = [objdump, '-t', '-demangle', path]
+ p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ if p.returncode != 0:
+ r.crashed = [r.file]
+ return r
+
+ output = stdout.decode('utf-8')
+
+ for line in output.splitlines():
+ mangled, demangled = parse_line(line)
+ if mangled is None:
+ continue
+ r.nsymbols += 1
+ if "invalid mangled name" in demangled:
+ r.errors.add(mangled)
+ return r
+
+def add_results(r1, r2):
+ r1.crashed.extend(r2.crashed)
+ r1.errors.update(r2.errors)
+ r1.nsymbols += r2.nsymbols
+ r1.nfiles += r2.nfiles
+
+def print_result_row(directory, result):
+ print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
+ result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
+
+def process_one_chunk(pool, chunk_size, objdump, context):
+ objs = []
+
+ incomplete = False
+ dir_results = {}
+ ordered_dirs = []
+ while context.npending > 0 and len(objs) < chunk_size:
+ this_dir = context.pending_objs[0][0]
+ ordered_dirs.append(this_dir)
+ re = Result()
+ if context.rincomplete is not None:
+ re = context.rincomplete
+ context.rincomplete = None
+
+ dir_results[this_dir] = re
+ re.file = this_dir
+
+ nneeded = chunk_size - len(objs)
+ objs_this_dir = context.pending_objs[0][1]
+ navail = len(objs_this_dir)
+ ntaken = min(nneeded, navail)
+ objs.extend(objs_this_dir[0:ntaken])
+ remaining_objs_this_dir = objs_this_dir[ntaken:]
+ context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
+ context.npending -= ntaken
+ if ntaken == navail:
+ context.pending_objs.pop(0)
+ else:
+ incomplete = True
+
+ re.nfiles += ntaken
+
+ assert(len(objs) == chunk_size or context.npending == 0)
+
+ copier = functools.partial(process_file, objdump=objdump)
+ mapped_results = list(pool.map(copier, objs))
+
+ for mr in mapped_results:
+ result_dir = os.path.dirname(mr.file)
+ result_entry = dir_results[result_dir]
+ add_results(result_entry, mr)
+
+ # It's only possible that a single item is incomplete, and it has to be the
+ # last item.
+ if incomplete:
+ context.rincomplete = dir_results[ordered_dirs[-1]]
+ ordered_dirs.pop()
+
+ # Now ordered_dirs contains a list of all directories which *did* complete.
+ for c in ordered_dirs:
+ re = dir_results[c]
+ add_results(context.rcumulative, re)
+ print_result_row(c, re)
+
+def process_pending_files(pool, chunk_size, objdump, context):
+ while context.npending >= chunk_size:
+ process_one_chunk(pool, chunk_size, objdump, context)
+
+def go():
+ global args
+
+ obj_dir = args.dir
+ extensions = args.extensions.split(',')
+ extensions = [x if x[0] == '.' else '.' + x for x in extensions]
+
+
+ pool_size = 48
+ pool = Pool(processes=pool_size)
+
+ try:
+ nfiles = 0
+ context = MapContext()
+
+ for root, dirs, files in os.walk(obj_dir):
+ root = os.path.normpath(root)
+ pending = []
+ for f in files:
+ file, ext = os.path.splitext(f)
+ if not ext in extensions:
+ continue
+
+ nfiles += 1
+ full_path = os.path.join(root, f)
+ full_path = os.path.normpath(full_path)
+ pending.append(full_path)
+
+ # If this directory had no object files, just print a default
+ # status line and continue with the next dir
+ if len(pending) == 0:
+ print_result_row(root, Result())
+ continue
+
+ context.npending += len(pending)
+ context.pending_objs.append((root, pending))
+ # Drain the tasks, `pool_size` at a time, until we have less than
+ # `pool_size` tasks remaining.
+ process_pending_files(pool, pool_size, args.objdump, context)
+
+ assert(context.npending < pool_size);
+ process_one_chunk(pool, pool_size, args.objdump, context)
+
+ total = context.rcumulative
+ nfailed = len(total.errors)
+ nsuccess = total.nsymbols - nfailed
+ ncrashed = len(total.crashed)
+
+ if (nfailed > 0):
+ print("Failures:")
+ for m in sorted(total.errors):
+ print(" " + m)
+ if (ncrashed > 0):
+ print("Crashes:")
+ for f in sorted(total.crashed):
+ print(" " + f)
+ print("Summary:")
+ spct = float(nsuccess)/float(total.nsymbols)
+ fpct = float(nfailed)/float(total.nsymbols)
+ cpct = float(ncrashed)/float(nfiles)
+ print("Processed {0} object files.".format(nfiles))
+ print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
+ print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
+ print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
+
+ except:
+ traceback.print_exc()
+
+ pool.close()
+ pool.join()
+
+if __name__ == "__main__":
+ def_obj = 'obj' if sys.platform == 'win32' else 'o'
+
+ parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
+ parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
+ parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' +
+ 'the tool is located as if by `which llvm-objdump`.')
+ parser.add_argument('--extensions', type=str, default=def_obj,
+ help='comma separated list of extensions to demangle (e.g. `o,obj`). ' +
+ 'By default this will be `obj` on Windows and `o` otherwise.')
+
+ args = parser.parse_args()
+
+
+ multiprocessing.freeze_support()
+ go()
+
More information about the llvm-commits
mailing list