[llvm] r341679 - utils/abtest: Refactor and add bisection method

Fri Sep 7 10:08:44 PDT 2018

Author: matze
Date: Fri Sep  7 10:08:44 2018
New Revision: 341679

URL: http://llvm.org/viewvc/llvm-project?rev=341679&view=rev
Log:
utils/abtest: Refactor and add bisection method

- Refactor/rewrite most of the code. Also make sure it passes
  pycodestyle/pyflakes now
- Add a new mode that performs bisection on the search space. This
  should be faster in the common case where there is only a small number
  of files or functions actually leading to failure.
  The previous sequential behavior can still be accessed via `--seq`.

Modified:
    llvm/trunk/utils/abtest.py

Modified: llvm/trunk/utils/abtest.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/abtest.py?rev=341679&r1=341678&r2=341679&view=diff
==============================================================================

--- llvm/trunk/utils/abtest.py (original)
+++ llvm/trunk/utils/abtest.py Fri Sep  7 10:08:44 2018
@@ -3,9 +3,9 @@
 # Given a previous good compile narrow down miscompiles.
 # Expects two directories named "before" and "after" each containing a set of
 # assembly or object files where the "after" version is assumed to be broken.
-# You also have to provide a script called "link_test". It is called with a list
-# of files which should be linked together and result tested. "link_test" should
-# returns with exitcode 0 if the linking and testing succeeded.
+# You also have to provide a script called "link_test". It is called with a
+# list of files which should be linked together and result tested. "link_test"
+# should returns with exitcode 0 if the linking and testing succeeded.
 #
 # abtest.py operates by taking all files from the "before" directory and
 # in each step replacing one of them with a file from the "bad" directory.
@@ -20,7 +20,7 @@
 #    1. Create a link_test script, make it executable. Simple Example:
 #          clang "$@" -o /tmp/test && /tmp/test || echo "PROBLEM"
 #    2. Run the script to figure out which files are miscompiled:
-#       > ./abtest.py 
+#       > ./abtest.py
 #       somefile.s: ok
 #       someotherfile.s: skipped: same content
 #       anotherfile.s: failed: './link_test' exitcode != 0
@@ -42,32 +42,142 @@ import os
 import subprocess
 import sys
 
-LINKTEST="./link_test"
-ESCAPE="\033[%sm"
-BOLD=ESCAPE % "1"
-RED=ESCAPE % "31"
-NORMAL=ESCAPE % "0"
-FAILED=RED+"failed"+NORMAL
+
+LINKTEST = "./link_test"
+ESCAPE = "\033[%sm"
+BOLD = ESCAPE % "1"
+RED = ESCAPE % "31"
+NORMAL = ESCAPE % "0"
+FAILED = RED + "failed" + NORMAL
+
 
 def find(dir, file_filter=None):
-    files = [walkdir[0]+"/"+file for walkdir in os.walk(dir) for file in walkdir[2]]
-    if file_filter != None:
+    files = [
+        walkdir[0]+"/"+file
+        for walkdir in os.walk(dir)
+        for file in walkdir[2]
+    ]
+    if file_filter is not None:
         files = filter(files, file_filter)
-    return files
+    return sorted(files)
+
 
 def error(message):
     stderr.write("Error: %s\n" % (message,))
 
+
 def warn(message):
     stderr.write("Warning: %s\n" % (message,))
 
+
+def info(message):
+    stderr.write("Info: %s\n" % (message,))
+
+
+def announce_test(name):
+    stderr.write("%s%s%s: " % (BOLD, name, NORMAL))
+    stderr.flush()
+
+
+def announce_result(result):
+    stderr.write(result)
+    stderr.write("\n")
+    stderr.flush()
+
+
+def format_namelist(l):
+    result = ", ".join(l[0:3])
+    if len(l) > 3:
+        result += "... (%d total)" % len(l)
+    return result
+
+
+def check_sanity(choices, perform_test):
+    announce_test("sanity check A")
+    all_a = {name: a_b[0] for name, a_b in choices}
+    res_a = perform_test(all_a)
+    if res_a is not True:
+        error("Picking all choices from A failed to pass the test")
+        sys.exit(1)
+
+    announce_test("sanity check B (expecting failure)")
+    all_b = {name: a_b[1] for name, a_b in choices}
+    res_b = perform_test(all_b)
+    if res_b is not False:
+        error("Picking all choices from B did unexpectedly pass the test")
+        sys.exit(1)
+
+
+def check_sequentially(choices, perform_test):
+    known_good = set()
+    all_a = {name: a_b[0] for name, a_b in choices}
+    n = 1
+    for name, a_b in sorted(choices):
+        picks = dict(all_a)
+        picks[name] = a_b[1]
+        announce_test("checking %s [%d/%d]" % (name, n, len(choices)))
+        n += 1
+        res = perform_test(picks)
+        if res is True:
+            known_good.add(name)
+    return known_good
+
+
+def check_bisect(choices, perform_test):
+    known_good = set()
+    if len(choices) == 0:
+        return known_good
+
+    choice_map = dict(choices)
+    all_a = {name: a_b[0] for name, a_b in choices}
+
+    def test_partition(partition, upcoming_partition):
+        # Compute the maximum number of checks we have to do in the worst case.
+        max_remaining_steps = len(partition) * 2 - 1
+        if upcoming_partition is not None:
+            max_remaining_steps += len(upcoming_partition) * 2 - 1
+        for x in partitions_to_split:
+            max_remaining_steps += (len(x) - 1) * 2
+
+        picks = dict(all_a)
+        for x in partition:
+            picks[x] = choice_map[x][1]
+        announce_test("checking %s [<=%d remaining]" %
+                      (format_namelist(partition), max_remaining_steps))
+        res = perform_test(picks)
+        if res is True:
+            known_good.update(partition)
+        elif len(partition) > 1:
+            partitions_to_split.insert(0, partition)
+
+    # TODO:
+    # - We could optimize based on the knowledge that when splitting a failed
+    #   partition into two and one side checks out okay then we can deduce that
+    #   the other partition must be a failure.
+    all_choice_names = [name for name, _ in choices]
+    partitions_to_split = [all_choice_names]
+    while len(partitions_to_split) > 0:
+        partition = partitions_to_split.pop()
+
+        middle = len(partition) // 2
+        left = partition[0:middle]
+        right = partition[middle:]
+
+        if len(left) > 0:
+            test_partition(left, right)
+        assert len(right) > 0
+        test_partition(right, None)
+
+    return known_good
+
+
 def extract_functions(file):
     functions = []
     in_function = None
     for line in open(file):
         marker = line.find(" -- Begin function ")
         if marker != -1:
-            if in_function != None:
+            if in_function is not None:
                 warn("Missing end of function %s" % (in_function,))
             funcname = line[marker + 19:-1]
             in_function = funcname
@@ -77,27 +187,28 @@ def extract_functions(file):
         marker = line.find(" -- End function")
         if marker != -1:
             text += line
-            functions.append( (in_function, text) )
+            functions.append((in_function, text))
             in_function = None
             continue
 
-        if in_function != None:
+        if in_function is not None:
             text += line
     return functions
 
-def replace_function(file, function, replacement, dest):
+
+def replace_functions(source, dest, replacements):
     out = open(dest, "w")
     skip = False
-    found = False
     in_function = None
-    for line in open(file):
+    for line in open(source):
         marker = line.find(" -- Begin function ")
         if marker != -1:
-            if in_function != None:
+            if in_function is not None:
                 warn("Missing end of function %s" % (in_function,))
             funcname = line[marker + 19:-1]
             in_function = funcname
-            if in_function == function:
+            replacement = replacements.get(in_function)
+            if replacement is not None:
                 out.write(replacement)
                 skip = True
         else:
@@ -111,122 +222,164 @@ def replace_function(file, function, rep
         if not skip:
             out.write(line)
 
-def announce_test(name):
-    stderr.write("%s%s%s: " % (BOLD, name, NORMAL))
-    stderr.flush()
-
-def announce_result(result, info):
-    stderr.write(result)
-    if info != "":
-        stderr.write(": %s" % info)
-    stderr.write("\n")
-    stderr.flush()
 
 def testrun(files):
-    linkline="%s %s" % (LINKTEST, " ".join(files),)
+    linkline = "%s %s" % (LINKTEST, " ".join(files),)
     res = subprocess.call(linkline, shell=True)
     if res != 0:
-        announce_result(FAILED, "'%s' exitcode != 0" % LINKTEST)
+        announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST)
         return False
     else:
-        announce_result("ok", "")
+        announce_result("ok")
         return True
 
-def check_files():
-    """Check files mode"""
-    for i in range(0, len(NO_PREFIX)):
-        f = NO_PREFIX[i]
-        b=baddir+"/"+f
-        if b not in BAD_FILES:
-            warn("There is no corresponding file to '%s' in %s" \
-                 % (gooddir+"/"+f, baddir))
-            continue
-
-        announce_test(f + " [%s/%s]" % (i+1, len(NO_PREFIX)))
-
-        # combine files (everything from good except f)
-        testfiles=[]
-        skip=False
-        for c in NO_PREFIX:
-            badfile = baddir+"/"+c
-            goodfile = gooddir+"/"+c
-            if c == f:
-                testfiles.append(badfile)
-                if filecmp.cmp(goodfile, badfile):
-                    announce_result("skipped", "same content")
-                    skip = True
-                    break
+
+def prepare_files(gooddir, baddir):
+    files_a = find(gooddir, "*")
+    files_b = find(baddir, "*")
+
+    basenames_a = set(map(os.path.basename, files_a))
+    basenames_b = set(map(os.path.basename, files_b))
+
+    for name in files_b:
+        basename = os.path.basename(name)
+        if basename not in basenames_a:
+            warn("There is no corresponding file to '%s' in %s" %
+                 (name, gooddir))
+    choices = []
+    skipped = []
+    for name in files_a:
+        basename = os.path.basename(name)
+        if basename not in basenames_b:
+            warn("There is no corresponding file to '%s' in %s" %
+                 (name, baddir))
+
+        file_a = gooddir + "/" + basename
+        file_b = baddir + "/" + basename
+        if filecmp.cmp(file_a, file_b):
+            skipped.append(basename)
+            continue
+
+        choice = (basename, (file_a, file_b))
+        choices.append(choice)
+
+    if len(skipped) > 0:
+        info("Skipped (same content): %s" % format_namelist(skipped))
+
+    def perform_test(picks):
+        files = []
+        # Note that we iterate over files_a so we don't change the order
+        # (cannot use `picks` as it is a dictionary without order)
+        for x in files_a:
+            basename = os.path.basename(x)
+            picked = picks.get(basename)
+            if picked is None:
+                assert basename in skipped
+                files.append(x)
             else:
-                testfiles.append(goodfile)
-        if skip:
+                files.append(picked)
+        return testrun(files)
+
+    return perform_test, choices
+
+
+def prepare_functions(to_check, gooddir, goodfile, badfile):
+    files_good = find(gooddir, "*")
+
+    functions_a = extract_functions(goodfile)
+    functions_a_map = dict(functions_a)
+    functions_b_map = dict(extract_functions(badfile))
+
+    for name in functions_b_map.keys():
+        if name not in functions_a_map:
+            warn("Function '%s' missing from good file" % name)
+    choices = []
+    skipped = []
+    for name, candidate_a in functions_a:
+        candidate_b = functions_b_map.get(name)
+        if candidate_b is None:
+            warn("Function '%s' missing from bad file" % name)
             continue
-        testrun(testfiles)
+        if candidate_a == candidate_b:
+            skipped.append(name)
+            continue
+        choice = name, (candidate_a, candidate_b)
+        choices.append(choice)
+
+    if len(skipped) > 0:
+        info("Skipped (same content): %s" % format_namelist(skipped))
+
+    combined_file = '/tmp/combined2.s'
+    files = []
+    found_good_file = False
+    for c in files_good:
+        if os.path.basename(c) == to_check:
+            found_good_file = True
+            files.append(combined_file)
+            continue
+        files.append(c)
+    assert found_good_file
+
+    def perform_test(picks):
+        for name, x in picks.items():
+            assert x == functions_a_map[name] or x == functions_b_map[name]
+        replace_functions(goodfile, combined_file, picks)
+        return testrun(files)
+    return perform_test, choices
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--a', dest='dir_a', default='before')
+    parser.add_argument('--b', dest='dir_b', default='after')
+    parser.add_argument('--insane', help='Skip sanity check',
+                        action='store_true')
+    parser.add_argument('--seq',
+                        help='Check sequentially instead of bisection',
+                        action='store_true')
+    parser.add_argument('file', metavar='file', nargs='?')
+    config = parser.parse_args()
+
+    gooddir = config.dir_a
+    baddir = config.dir_b
+
+    # Preparation phase: Creates a dictionary mapping names to a list of two
+    # choices each. The bisection algorithm will pick one choice for each name
+    # and then run the perform_test function on it.
+    if config.file is not None:
+        goodfile = gooddir + "/" + config.file
+        badfile = baddir + "/" + config.file
+        perform_test, choices = prepare_functions(config.file, gooddir,
+                                                  goodfile, badfile)
+    else:
+        perform_test, choices = prepare_files(gooddir, baddir)
+
+    info("%d bisection choices" % len(choices))
+
+    # "Checking whether build environment is sane ..."
+    if not config.insane:
+        if not os.access(LINKTEST, os.X_OK):
+            error("Expect '%s' to be present and executable" % (LINKTEST,))
+            exit(1)
+
+        check_sanity(choices, perform_test)
+
+    if config.seq:
+        known_good = check_sequentially(choices, perform_test)
+    else:
+        known_good = check_bisect(choices, perform_test)
+
+    stderr.write("")
+    if len(known_good) != len(choices):
+        stderr.write("== Failing ==\n")
+        for name, _ in choices:
+            if name not in known_good:
+                stderr.write("%s\n" % name)
+    else:
+        # This shouldn't happen when the sanity check works...
+        # Maybe link_test isn't deterministic?
+        stderr.write("Could not identify failing parts?!?")
 
-def check_functions_in_file(base, goodfile, badfile):
-    functions = extract_functions(goodfile)
-    if len(functions) == 0:
-        warn("Couldn't find any function in %s, missing annotations?" % (goodfile,))
-        return
-    badfunctions = dict(extract_functions(badfile))
-    if len(functions) == 0:
-        warn("Couldn't find any function in %s, missing annotations?" % (badfile,))
-        return
-
-    COMBINED="/tmp/combined.s"
-    i = 0
-    for (func,func_text) in functions:
-        announce_test(func + " [%s/%s]" % (i+1, len(functions)))
-        i+=1
-        if func not in badfunctions:
-            warn("Function '%s' missing from bad file" % func)
-            continue
-        if badfunctions[func] == func_text:
-            announce_result("skipped", "same content")
-            continue
-        replace_function(goodfile, func, badfunctions[func], COMBINED)
-        testfiles=[]
-        for c in NO_PREFIX:
-            if c == base:
-                testfiles.append(COMBINED)
-                continue
-            testfiles.append(gooddir + "/" + c)
-
-        testrun(testfiles)
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--a', dest='dir_a', default='before')
-parser.add_argument('--b', dest='dir_b', default='after')
-parser.add_argument('--insane', help='Skip sanity check', action='store_true')
-parser.add_argument('file', metavar='file', nargs='?')
-config = parser.parse_args()
-
-gooddir=config.dir_a
-baddir=config.dir_b
-
-BAD_FILES=find(baddir, "*")
-GOOD_FILES=find(gooddir, "*")
-NO_PREFIX=sorted([x[len(gooddir)+1:] for x in GOOD_FILES])
-
-# "Checking whether build environment is sane ..."
-if not config.insane:
-    announce_test("sanity check")
-    if not os.access(LINKTEST, os.X_OK):
-        error("Expect '%s' to be present and executable" % (LINKTEST,))
-        exit(1)
-
-    res = testrun(GOOD_FILES)
-    if not res:
-        # "build environment is grinning and holding a spatula. Guess not."
-        linkline="%s %s" % (LINKTEST, " ".join(GOOD_FILES),)
-        stderr.write("\n%s\n\n" % linkline)
-        stderr.write("Returned with exitcode != 0\n")
-        sys.exit(1)
 
-if config.file is not None:
-    # File exchange mode
-    goodfile = gooddir+"/"+config.file
-    badfile = baddir+"/"+config.file
-    check_functions_in_file(config.file, goodfile, badfile)
-else:
-    # Function exchange mode
-    check_files()
+if __name__ == '__main__':
+    main()