[clang] 5b4f143 - [analyzer][tests] Introduce analyzer benchmarking framework

Tue Jul 14 01:43:19 PDT 2020

Author: Valeriy Savchenko
Date: 2020-07-14T11:42:46+03:00
New Revision: 5b4f143564502664a9d1197d6909047eab49530e

URL: https://github.com/llvm/llvm-project/commit/5b4f143564502664a9d1197d6909047eab49530e
DIFF: https://github.com/llvm/llvm-project/commit/5b4f143564502664a9d1197d6909047eab49530e.diff

LOG: [analyzer][tests] Introduce analyzer benchmarking framework

Summary:
This commit includes a couple of changes:
  * Benchmark selected projects by analyzing them multiple times
  * Compare two benchmarking results and visualizing them on one chart
  * Organize project build logging, so we can use the same code
    in benchmarks

Differential Revision: https://reviews.llvm.org/D83539

Added: 
    clang/utils/analyzer/SATestBenchmark.py

Modified: 
    clang/utils/analyzer/SATest.py
    clang/utils/analyzer/SATestBuild.py
    clang/utils/analyzer/SATestUpdateDiffs.py
    clang/utils/analyzer/requirements.txt

Removed: 
    


################################################################################
diff  --git a/clang/utils/analyzer/SATest.py b/clang/utils/analyzer/SATest.py
index 16f1dce0c584..46e636ad2895 100755

--- a/clang/utils/analyzer/SATest.py
+++ b/clang/utils/analyzer/SATest.py
@@ -34,29 +34,10 @@ def add(parser, args):
 
 def build(parser, args):
     import SATestBuild
-    from ProjectMap import ProjectMap
 
     SATestBuild.VERBOSE = args.verbose
 
-    project_map = ProjectMap()
-    projects = project_map.projects
-
-    if args.projects:
-        projects_arg = args.projects.split(",")
-        available_projects = [project.name
-                              for project in projects]
-
-        # validate that given projects are present in the project map file
-        for manual_project in projects_arg:
-            if manual_project not in available_projects:
-                parser.error("Project '{project}' is not found in "
-                             "the project map file. Available projects are "
-                             "{all}.".format(project=manual_project,
-                                             all=available_projects))
-
-        projects = [project.with_fields(enabled=project.name in projects_arg)
-                    for project in projects]
-
+    projects = get_projects(parser, args.projects)
     tester = SATestBuild.RegressionTester(args.jobs,
                                           projects,
                                           args.override_compiler,
@@ -100,6 +81,44 @@ def update(parser, args):
         SATestUpdateDiffs.update_reference_results(project)
 
 
+def benchmark(parser, args):
+    from SATestBenchmark import Benchmark
+
+    projects = get_projects(parser, args.projects)
+    benchmark = Benchmark(projects, args.iterations, args.output)
+    benchmark.run()
+
+
+def benchmark_compare(parser, args):
+    import SATestBenchmark
+    SATestBenchmark.compare(args.old, args.new, args.output)
+
+
+def get_projects(parser, projects_str):
+    from ProjectMap import ProjectMap
+
+    project_map = ProjectMap()
+    projects = project_map.projects
+
+    if projects_str:
+        projects_arg = projects_str.split(",")
+        available_projects = [project.name
+                              for project in projects]
+
+        # validate that given projects are present in the project map file
+        for manual_project in projects_arg:
+            if manual_project not in available_projects:
+                parser.error("Project '{project}' is not found in "
+                             "the project map file. Available projects are "
+                             "{all}.".format(project=manual_project,
+                                             all=available_projects))
+
+        projects = [project.with_fields(enabled=project.name in projects_arg)
+                    for project in projects]
+
+    return projects
+
+
 def docker(parser, args):
     if len(args.rest) > 0:
         if args.rest[0] != "--":
@@ -284,6 +303,36 @@ def main():
                              "to the docker's entrypoint.")
     dock_parser.set_defaults(func=docker)
 
+    # benchmark subcommand
+    bench_parser = subparsers.add_parser(
+        "benchmark",
+        help="Run benchmarks by building a set of projects multiple times.")
+
+    bench_parser.add_argument("-i", "--iterations", action="store",
+                              type=int, default=20,
+                              help="Number of iterations for building each "
+                              "project.")
+    bench_parser.add_argument("-o", "--output", action="store",
+                              default="benchmark.csv",
+                              help="Output csv file for the benchmark results")
+    bench_parser.add_argument("--projects", action="store", default="",
+                              help="Comma-separated list of projects to test")
+    bench_parser.set_defaults(func=benchmark)
+
+    bench_subparsers = bench_parser.add_subparsers()
+    bench_compare_parser = bench_subparsers.add_parser(
+        "compare",
+        help="Compare benchmark runs.")
+    bench_compare_parser.add_argument("--old", action="store", required=True,
+                                      help="Benchmark reference results to "
+                                      "compare agains.")
+    bench_compare_parser.add_argument("--new", action="store", required=True,
+                                      help="New benchmark results to check.")
+    bench_compare_parser.add_argument("-o", "--output",
+                                      action="store", required=True,
+                                      help="Output file for plots.")
+    bench_compare_parser.set_defaults(func=benchmark_compare)
+
     args = parser.parse_args()
     args.func(parser, args)
 

diff  --git a/clang/utils/analyzer/SATestBenchmark.py b/clang/utils/analyzer/SATestBenchmark.py
new file mode 100644
index 000000000000..0fa2204bbbe7
--- /dev/null
+++ b/clang/utils/analyzer/SATestBenchmark.py
@@ -0,0 +1,158 @@
+"""
+Static Analyzer qualification infrastructure.
+
+This source file contains all the functionality related to benchmarking
+the analyzer on a set projects.  Right now, this includes measuring
+execution time and peak memory usage.  Benchmark runs analysis on every
+project multiple times to get a better picture about the distribution
+of measured values.
+
+Additionally, this file includes a comparison routine for two benchmarking
+results that plots the result together on one chart.
+"""
+
+import SATestUtils as utils
+from SATestBuild import ProjectTester, stdout, TestInfo
+from ProjectMap import ProjectInfo
+
+import pandas as pd
+from typing import List, Tuple
+
+
+INDEX_COLUMN = "index"
+
+
+def _save(data: pd.DataFrame, file_path: str):
+    data.to_csv(file_path, index_label=INDEX_COLUMN)
+
+
+def _load(file_path: str) -> pd.DataFrame:
+    return pd.read_csv(file_path, index_col=INDEX_COLUMN)
+
+
+class Benchmark:
+    """
+    Becnhmark class encapsulates one functionality: it runs the analysis
+    multiple times for the given set of projects and stores results in the
+    specified file.
+    """
+    def __init__(self, projects: List[ProjectInfo], iterations: int,
+                 output_path: str):
+        self.projects = projects
+        self.iterations = iterations
+        self.out = output_path
+
+    def run(self):
+        results = [self._benchmark_project(project)
+                   for project in self.projects]
+
+        data = pd.concat(results, ignore_index=True)
+        _save(data, self.out)
+
+    def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame:
+        if not project.enabled:
+            stdout(f" \n\n--- Skipping disabled project {project.name}\n")
+            return
+
+        stdout(f" \n\n--- Benchmarking project {project.name}\n")
+
+        test_info = TestInfo(project)
+        tester = ProjectTester(test_info, silent=True)
+        project_dir = tester.get_project_dir()
+        output_dir = tester.get_output_dir()
+
+        raw_data = []
+
+        for i in range(self.iterations):
+            stdout(f"Iteration #{i + 1}")
+            time, mem = tester.build(project_dir, output_dir)
+            raw_data.append({"time": time, "memory": mem,
+                             "iteration": i, "project": project.name})
+            stdout(f"time: {utils.time_to_str(time)}, "
+                   f"peak memory: {utils.memory_to_str(mem)}")
+
+        return pd.DataFrame(raw_data)
+
+
+def compare(old_path: str, new_path: str, plot_file: str):
+    """
+    Compare two benchmarking results stored as .csv files
+    and produce a plot in the specified file.
+    """
+    old = _load(old_path)
+    new = _load(new_path)
+
+    old_projects = set(old["project"])
+    new_projects = set(new["project"])
+    common_projects = old_projects & new_projects
+
+    # Leave only rows for projects common to both dataframes.
+    old = old[old["project"].isin(common_projects)]
+    new = new[new["project"].isin(common_projects)]
+
+    old, new = _normalize(old, new)
+
+    # Seaborn prefers all the data to be in one dataframe.
+    old["kind"] = "old"
+    new["kind"] = "new"
+    data = pd.concat([old, new], ignore_index=True)
+
+    # TODO: compare data in old and new dataframes using statistical tests
+    #       to check if they belong to the same distribution
+    _plot(data, plot_file)
+
+
+def _normalize(old: pd.DataFrame,
+               new: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    # This creates a dataframe with all numerical data averaged.
+    means = old.groupby("project").mean()
+    return _normalize_impl(old, means), _normalize_impl(new, means)
+
+
+def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame):
+    # Right now 'means' has one row corresponding to one project,
+    # while 'data' has N rows for each project (one for each iteration).
+    #
+    # In order for us to work easier with this data, we duplicate
+    # 'means' data to match the size of the 'data' dataframe.
+    #
+    # All the columns from 'data' will maintain their names, while
+    # new columns coming from 'means' will have "_mean" suffix.
+    joined_data = data.merge(means, on="project", suffixes=("", "_mean"))
+    _normalize_key(joined_data, "time")
+    _normalize_key(joined_data, "memory")
+    return joined_data
+
+
+def _normalize_key(data: pd.DataFrame, key: str):
+    norm_key = _normalized_name(key)
+    mean_key = f"{key}_mean"
+    data[norm_key] = data[key] / data[mean_key]
+
+
+def _normalized_name(name: str) -> str:
+    return f"normalized {name}"
+
+
+def _plot(data: pd.DataFrame, plot_file: str):
+    import matplotlib
+    import seaborn as sns
+    from matplotlib import pyplot as plt
+
+    sns.set_style("whitegrid")
+    # We want to have time and memory charts one above the other.
+    figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))
+
+    def _subplot(key: str, ax: matplotlib.axes.Axes):
+        sns.boxplot(x="project", y=_normalized_name(key), hue="kind",
+                    data=data, palette=sns.color_palette("BrBG", 2), ax=ax)
+
+    _subplot("time", ax1)
+    # No need to have xlabels on both top and bottom charts.
+    ax1.set_xlabel("")
+
+    _subplot("memory", ax2)
+    # The legend on the top chart is enough.
+    ax2.get_legend().remove()
+
+    figure.savefig(plot_file)

diff  --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py
index eefab869f6ef..ed5c7379bb5b 100644
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@@ -87,10 +87,18 @@ def fileno(self) -> int:
         return 0
 
 
-Logger = logging.getLogger("main")
 LOCAL = threading.local()
-LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
-LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
+
+
+def init_logger(name: str):
+    # TODO: use debug levels for VERBOSE messages
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.DEBUG)
+    LOCAL.stdout = StreamToLogger(logger, logging.INFO)
+    LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
+
+
+init_logger("main")
 
 
 def stderr(message: str):
@@ -102,7 +110,6 @@ def stdout(message: str):
 
 
 logging.basicConfig(
-    level=logging.DEBUG,
     format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
 
 
@@ -298,12 +305,13 @@ class ProjectTester:
     """
     A component aggregating testing for one project.
     """
-    def __init__(self, test_info: TestInfo):
+    def __init__(self, test_info: TestInfo, silent: bool = False):
         self.project = test_info.project
         self.override_compiler = test_info.override_compiler
         self.extra_analyzer_config = test_info.extra_analyzer_config
         self.is_reference_build = test_info.is_reference_build
         self.strictness = test_info.strictness
+        self.silent = silent
 
     def test(self) -> bool:
         """
@@ -312,20 +320,19 @@ def test(self) -> bool:
         to the :param strictness: criteria.
         """
         if not self.project.enabled:
-            stdout(f" \n\n--- Skipping disabled project {self.project.name}\n")
+            self.out(
+                f" \n\n--- Skipping disabled project {self.project.name}\n")
             return True
 
-        stdout(f" \n\n--- Building project {self.project.name}\n")
+        self.out(f" \n\n--- Building project {self.project.name}\n")
 
         start_time = time.time()
 
         project_dir = self.get_project_dir()
-        if VERBOSE >= 1:
-            stdout(f"  Build directory: {project_dir}.\n")
+        self.vout(f"  Build directory: {project_dir}.\n")
 
         # Set the build results directory.
         output_dir = self.get_output_dir()
-        output_dir = os.path.join(project_dir, output_dir)
 
         self.build(project_dir, output_dir)
         check_build(output_dir)
@@ -336,8 +343,8 @@ def test(self) -> bool:
         else:
             passed = run_cmp_results(project_dir, self.strictness)
 
-        stdout(f"Completed tests for project {self.project.name} "
-               f"(time: {time.time() - start_time:.2f}).\n")
+        self.out(f"Completed tests for project {self.project.name} "
+                 f"(time: {time.time() - start_time:.2f}).\n")
 
         return passed
 
@@ -346,22 +353,23 @@ def get_project_dir(self) -> str:
 
     def get_output_dir(self) -> str:
         if self.is_reference_build:
-            return REF_PREFIX + OUTPUT_DIR_NAME
+            dirname = REF_PREFIX + OUTPUT_DIR_NAME
         else:
-            return OUTPUT_DIR_NAME
+            dirname = OUTPUT_DIR_NAME
+
+        return os.path.join(self.get_project_dir(), dirname)
 
-    def build(self, directory: str, output_dir: str):
+    def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
         build_log_path = get_build_log_path(output_dir)
 
-        stdout(f"Log file: {build_log_path}\n")
-        stdout(f"Output directory: {output_dir}\n")
+        self.out(f"Log file: {build_log_path}\n")
+        self.out(f"Output directory: {output_dir}\n")
 
         remove_log_file(output_dir)
 
         # Clean up scan build results.
         if os.path.exists(output_dir):
-            if VERBOSE >= 1:
-                stdout(f"  Removing old results: {output_dir}\n")
+            self.vout(f"  Removing old results: {output_dir}\n")
 
             shutil.rmtree(output_dir)
 
@@ -374,7 +382,7 @@ def build(self, directory: str, output_dir: str):
                 self._download_and_patch(directory, build_log_file)
                 run_cleanup_script(directory, build_log_file)
                 build_time, memory = self.scan_build(directory, output_dir,
-                                               build_log_file)
+                                                     build_log_file)
             else:
                 build_time, memory = self.analyze_preprocessed(directory,
                                                                output_dir)
@@ -384,9 +392,11 @@ def build(self, directory: str, output_dir: str):
                 normalize_reference_results(directory, output_dir,
                                             self.project.mode)
 
-        stdout(f"Build complete (time: {utils.time_to_str(build_time)}, "
-               f"peak memory: {utils.memory_to_str(memory)}). "
-               f"See the log for more details: {build_log_path}\n")
+        self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
+                 f"peak memory: {utils.memory_to_str(memory)}). "
+                 f"See the log for more details: {build_log_path}\n")
+
+        return build_time, memory
 
     def scan_build(self, directory: str, output_dir: str,
                    build_log_file: IO) -> Tuple[float, int]:
@@ -454,8 +464,7 @@ def scan_build(self, directory: str, output_dir: str,
 
                 command_to_run = command_prefix + command
 
-                if VERBOSE >= 1:
-                    stdout(f"  Executing: {command_to_run}\n")
+                self.vout(f"  Executing: {command_to_run}\n")
 
                 time, mem = utils.check_and_measure_call(
                     command_to_run, cwd=cwd,
@@ -522,8 +531,7 @@ def analyze_preprocessed(self, directory: str,
             log_path = os.path.join(fail_path, file_name + ".stderr.txt")
             with open(log_path, "w+") as log_file:
                 try:
-                    if VERBOSE >= 1:
-                        stdout(f"  Executing: {command}\n")
+                    self.vout(f"  Executing: {command}\n")
 
                     time, mem = utils.check_and_measure_call(
                         command, cwd=directory, stderr=log_file,
@@ -592,8 +600,10 @@ def _download(self, directory: str, build_log_file: IO):
                 f"for the '{self.project.name}' project")
 
     def _download_from_git(self, directory: str, build_log_file: IO):
+        repo = self.project.origin
         cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
-        check_call(f"git clone --recursive {self.project.origin} {cached_source}",
+
+        check_call(f"git clone --recursive {repo} {cached_source}",
                    cwd=directory, stderr=build_log_file,
                    stdout=build_log_file, shell=True)
         check_call(f"git checkout --quiet {self.project.commit}",
@@ -624,16 +634,15 @@ def _run_download_script(directory: str, build_log_file: IO):
                          out=LOCAL.stdout, err=LOCAL.stderr,
                          verbose=VERBOSE)
 
-    @staticmethod
-    def _apply_patch(directory: str, build_log_file: IO):
+    def _apply_patch(self, directory: str, build_log_file: IO):
         patchfile_path = os.path.join(directory, PATCHFILE_NAME)
         patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 
         if not os.path.exists(patchfile_path):
-            stdout("  No local patches.\n")
+            self.out("  No local patches.\n")
             return
 
-        stdout("  Applying patch.\n")
+        self.out("  Applying patch.\n")
         try:
             check_call(f"patch -p1 < '{patchfile_path}'",
                        cwd=patched_source,
@@ -646,6 +655,14 @@ def _apply_patch(directory: str, build_log_file: IO):
                    f"See {build_log_file.name} for details.\n")
             sys.exit(1)
 
+    def out(self, what: str):
+        if not self.silent:
+            stdout(what)
+
+    def vout(self, what: str):
+        if VERBOSE >= 1:
+            self.out(what)
+
 
 class TestProjectThread(threading.Thread):
     def __init__(self, tasks_queue: TestQueue,
@@ -668,10 +685,7 @@ def run(self):
         while not self.tasks_queue.empty():
             try:
                 test_info = self.tasks_queue.get()
-
-                Logger = logging.getLogger(test_info.project.name)
-                LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
-                LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
+                init_logger(test_info.project.name)
 
                 tester = ProjectTester(test_info)
                 if not tester.test():

diff  --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py
index 1a2c41d2debf..920fa15e4c6f 100644
--- a/clang/utils/analyzer/SATestUpdateDiffs.py
+++ b/clang/utils/analyzer/SATestUpdateDiffs.py
@@ -21,10 +21,10 @@ def update_reference_results(project: ProjectInfo):
     project_dir = tester.get_project_dir()
 
     tester.is_reference_build = True
-    ref_results_path = os.path.join(project_dir, tester.get_output_dir())
+    ref_results_path = tester.get_output_dir()
 
     tester.is_reference_build = False
-    created_results_path = os.path.join(project_dir, tester.get_output_dir())
+    created_results_path = tester.get_output_dir()
 
     if not os.path.exists(created_results_path):
         print("New results not found, was SATestBuild.py previously run?",

diff  --git a/clang/utils/analyzer/requirements.txt b/clang/utils/analyzer/requirements.txt
index ec4f66929952..8ae8bc88ac19 100644
--- a/clang/utils/analyzer/requirements.txt
+++ b/clang/utils/analyzer/requirements.txt
@@ -1,4 +1,6 @@
 graphviz
 humanize
 matplotlib
+pandas
 psutil
+seaborn