[clang] 5b4f143 - [analyzer][tests] Introduce analyzer benchmarking framework
Valeriy Savchenko via cfe-commits
cfe-commits at lists.llvm.org
Tue Jul 14 01:43:19 PDT 2020
Author: Valeriy Savchenko
Date: 2020-07-14T11:42:46+03:00
New Revision: 5b4f143564502664a9d1197d6909047eab49530e
URL: https://github.com/llvm/llvm-project/commit/5b4f143564502664a9d1197d6909047eab49530e
DIFF: https://github.com/llvm/llvm-project/commit/5b4f143564502664a9d1197d6909047eab49530e.diff
LOG: [analyzer][tests] Introduce analyzer benchmarking framework
Summary:
This commit includes a couple of changes:
* Benchmark selected projects by analyzing them multiple times
* Compare two benchmarking results and visualizing them on one chart
* Organize project build logging, so we can use the same code
in benchmarks
Differential Revision: https://reviews.llvm.org/D83539
Added:
clang/utils/analyzer/SATestBenchmark.py
Modified:
clang/utils/analyzer/SATest.py
clang/utils/analyzer/SATestBuild.py
clang/utils/analyzer/SATestUpdateDiffs.py
clang/utils/analyzer/requirements.txt
Removed:
################################################################################
diff --git a/clang/utils/analyzer/SATest.py b/clang/utils/analyzer/SATest.py
index 16f1dce0c584..46e636ad2895 100755
--- a/clang/utils/analyzer/SATest.py
+++ b/clang/utils/analyzer/SATest.py
@@ -34,29 +34,10 @@ def add(parser, args):
def build(parser, args):
import SATestBuild
- from ProjectMap import ProjectMap
SATestBuild.VERBOSE = args.verbose
- project_map = ProjectMap()
- projects = project_map.projects
-
- if args.projects:
- projects_arg = args.projects.split(",")
- available_projects = [project.name
- for project in projects]
-
- # validate that given projects are present in the project map file
- for manual_project in projects_arg:
- if manual_project not in available_projects:
- parser.error("Project '{project}' is not found in "
- "the project map file. Available projects are "
- "{all}.".format(project=manual_project,
- all=available_projects))
-
- projects = [project.with_fields(enabled=project.name in projects_arg)
- for project in projects]
-
+ projects = get_projects(parser, args.projects)
tester = SATestBuild.RegressionTester(args.jobs,
projects,
args.override_compiler,
@@ -100,6 +81,44 @@ def update(parser, args):
SATestUpdateDiffs.update_reference_results(project)
+def benchmark(parser, args):
+ from SATestBenchmark import Benchmark
+
+ projects = get_projects(parser, args.projects)
+ benchmark = Benchmark(projects, args.iterations, args.output)
+ benchmark.run()
+
+
+def benchmark_compare(parser, args):
+ import SATestBenchmark
+ SATestBenchmark.compare(args.old, args.new, args.output)
+
+
+def get_projects(parser, projects_str):
+ from ProjectMap import ProjectMap
+
+ project_map = ProjectMap()
+ projects = project_map.projects
+
+ if projects_str:
+ projects_arg = projects_str.split(",")
+ available_projects = [project.name
+ for project in projects]
+
+ # validate that given projects are present in the project map file
+ for manual_project in projects_arg:
+ if manual_project not in available_projects:
+ parser.error("Project '{project}' is not found in "
+ "the project map file. Available projects are "
+ "{all}.".format(project=manual_project,
+ all=available_projects))
+
+ projects = [project.with_fields(enabled=project.name in projects_arg)
+ for project in projects]
+
+ return projects
+
+
def docker(parser, args):
if len(args.rest) > 0:
if args.rest[0] != "--":
@@ -284,6 +303,36 @@ def main():
"to the docker's entrypoint.")
dock_parser.set_defaults(func=docker)
+ # benchmark subcommand
+ bench_parser = subparsers.add_parser(
+ "benchmark",
+ help="Run benchmarks by building a set of projects multiple times.")
+
+ bench_parser.add_argument("-i", "--iterations", action="store",
+ type=int, default=20,
+ help="Number of iterations for building each "
+ "project.")
+ bench_parser.add_argument("-o", "--output", action="store",
+ default="benchmark.csv",
+ help="Output csv file for the benchmark results")
+ bench_parser.add_argument("--projects", action="store", default="",
+ help="Comma-separated list of projects to test")
+ bench_parser.set_defaults(func=benchmark)
+
+ bench_subparsers = bench_parser.add_subparsers()
+ bench_compare_parser = bench_subparsers.add_parser(
+ "compare",
+ help="Compare benchmark runs.")
+ bench_compare_parser.add_argument("--old", action="store", required=True,
+ help="Benchmark reference results to "
+ "compare agains.")
+ bench_compare_parser.add_argument("--new", action="store", required=True,
+ help="New benchmark results to check.")
+ bench_compare_parser.add_argument("-o", "--output",
+ action="store", required=True,
+ help="Output file for plots.")
+ bench_compare_parser.set_defaults(func=benchmark_compare)
+
args = parser.parse_args()
args.func(parser, args)
diff --git a/clang/utils/analyzer/SATestBenchmark.py b/clang/utils/analyzer/SATestBenchmark.py
new file mode 100644
index 000000000000..0fa2204bbbe7
--- /dev/null
+++ b/clang/utils/analyzer/SATestBenchmark.py
@@ -0,0 +1,158 @@
+"""
+Static Analyzer qualification infrastructure.
+
+This source file contains all the functionality related to benchmarking
+the analyzer on a set projects. Right now, this includes measuring
+execution time and peak memory usage. Benchmark runs analysis on every
+project multiple times to get a better picture about the distribution
+of measured values.
+
+Additionally, this file includes a comparison routine for two benchmarking
+results that plots the result together on one chart.
+"""
+
+import SATestUtils as utils
+from SATestBuild import ProjectTester, stdout, TestInfo
+from ProjectMap import ProjectInfo
+
+import pandas as pd
+from typing import List, Tuple
+
+
+INDEX_COLUMN = "index"
+
+
+def _save(data: pd.DataFrame, file_path: str):
+ data.to_csv(file_path, index_label=INDEX_COLUMN)
+
+
+def _load(file_path: str) -> pd.DataFrame:
+ return pd.read_csv(file_path, index_col=INDEX_COLUMN)
+
+
+class Benchmark:
+ """
+ Becnhmark class encapsulates one functionality: it runs the analysis
+ multiple times for the given set of projects and stores results in the
+ specified file.
+ """
+ def __init__(self, projects: List[ProjectInfo], iterations: int,
+ output_path: str):
+ self.projects = projects
+ self.iterations = iterations
+ self.out = output_path
+
+ def run(self):
+ results = [self._benchmark_project(project)
+ for project in self.projects]
+
+ data = pd.concat(results, ignore_index=True)
+ _save(data, self.out)
+
+ def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame:
+ if not project.enabled:
+ stdout(f" \n\n--- Skipping disabled project {project.name}\n")
+ return
+
+ stdout(f" \n\n--- Benchmarking project {project.name}\n")
+
+ test_info = TestInfo(project)
+ tester = ProjectTester(test_info, silent=True)
+ project_dir = tester.get_project_dir()
+ output_dir = tester.get_output_dir()
+
+ raw_data = []
+
+ for i in range(self.iterations):
+ stdout(f"Iteration #{i + 1}")
+ time, mem = tester.build(project_dir, output_dir)
+ raw_data.append({"time": time, "memory": mem,
+ "iteration": i, "project": project.name})
+ stdout(f"time: {utils.time_to_str(time)}, "
+ f"peak memory: {utils.memory_to_str(mem)}")
+
+ return pd.DataFrame(raw_data)
+
+
+def compare(old_path: str, new_path: str, plot_file: str):
+ """
+ Compare two benchmarking results stored as .csv files
+ and produce a plot in the specified file.
+ """
+ old = _load(old_path)
+ new = _load(new_path)
+
+ old_projects = set(old["project"])
+ new_projects = set(new["project"])
+ common_projects = old_projects & new_projects
+
+ # Leave only rows for projects common to both dataframes.
+ old = old[old["project"].isin(common_projects)]
+ new = new[new["project"].isin(common_projects)]
+
+ old, new = _normalize(old, new)
+
+ # Seaborn prefers all the data to be in one dataframe.
+ old["kind"] = "old"
+ new["kind"] = "new"
+ data = pd.concat([old, new], ignore_index=True)
+
+ # TODO: compare data in old and new dataframes using statistical tests
+ # to check if they belong to the same distribution
+ _plot(data, plot_file)
+
+
+def _normalize(old: pd.DataFrame,
+ new: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
+ # This creates a dataframe with all numerical data averaged.
+ means = old.groupby("project").mean()
+ return _normalize_impl(old, means), _normalize_impl(new, means)
+
+
+def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame):
+ # Right now 'means' has one row corresponding to one project,
+ # while 'data' has N rows for each project (one for each iteration).
+ #
+ # In order for us to work easier with this data, we duplicate
+ # 'means' data to match the size of the 'data' dataframe.
+ #
+ # All the columns from 'data' will maintain their names, while
+ # new columns coming from 'means' will have "_mean" suffix.
+ joined_data = data.merge(means, on="project", suffixes=("", "_mean"))
+ _normalize_key(joined_data, "time")
+ _normalize_key(joined_data, "memory")
+ return joined_data
+
+
+def _normalize_key(data: pd.DataFrame, key: str):
+ norm_key = _normalized_name(key)
+ mean_key = f"{key}_mean"
+ data[norm_key] = data[key] / data[mean_key]
+
+
+def _normalized_name(name: str) -> str:
+ return f"normalized {name}"
+
+
+def _plot(data: pd.DataFrame, plot_file: str):
+ import matplotlib
+ import seaborn as sns
+ from matplotlib import pyplot as plt
+
+ sns.set_style("whitegrid")
+ # We want to have time and memory charts one above the other.
+ figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))
+
+ def _subplot(key: str, ax: matplotlib.axes.Axes):
+ sns.boxplot(x="project", y=_normalized_name(key), hue="kind",
+ data=data, palette=sns.color_palette("BrBG", 2), ax=ax)
+
+ _subplot("time", ax1)
+ # No need to have xlabels on both top and bottom charts.
+ ax1.set_xlabel("")
+
+ _subplot("memory", ax2)
+ # The legend on the top chart is enough.
+ ax2.get_legend().remove()
+
+ figure.savefig(plot_file)
diff --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py
index eefab869f6ef..ed5c7379bb5b 100644
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@@ -87,10 +87,18 @@ def fileno(self) -> int:
return 0
-Logger = logging.getLogger("main")
LOCAL = threading.local()
-LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
-LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
+
+
+def init_logger(name: str):
+ # TODO: use debug levels for VERBOSE messages
+ logger = logging.getLogger(name)
+ logger.setLevel(logging.DEBUG)
+ LOCAL.stdout = StreamToLogger(logger, logging.INFO)
+ LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
+
+
+init_logger("main")
def stderr(message: str):
@@ -102,7 +110,6 @@ def stdout(message: str):
logging.basicConfig(
- level=logging.DEBUG,
format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
@@ -298,12 +305,13 @@ class ProjectTester:
"""
A component aggregating testing for one project.
"""
- def __init__(self, test_info: TestInfo):
+ def __init__(self, test_info: TestInfo, silent: bool = False):
self.project = test_info.project
self.override_compiler = test_info.override_compiler
self.extra_analyzer_config = test_info.extra_analyzer_config
self.is_reference_build = test_info.is_reference_build
self.strictness = test_info.strictness
+ self.silent = silent
def test(self) -> bool:
"""
@@ -312,20 +320,19 @@ def test(self) -> bool:
to the :param strictness: criteria.
"""
if not self.project.enabled:
- stdout(f" \n\n--- Skipping disabled project {self.project.name}\n")
+ self.out(
+ f" \n\n--- Skipping disabled project {self.project.name}\n")
return True
- stdout(f" \n\n--- Building project {self.project.name}\n")
+ self.out(f" \n\n--- Building project {self.project.name}\n")
start_time = time.time()
project_dir = self.get_project_dir()
- if VERBOSE >= 1:
- stdout(f" Build directory: {project_dir}.\n")
+ self.vout(f" Build directory: {project_dir}.\n")
# Set the build results directory.
output_dir = self.get_output_dir()
- output_dir = os.path.join(project_dir, output_dir)
self.build(project_dir, output_dir)
check_build(output_dir)
@@ -336,8 +343,8 @@ def test(self) -> bool:
else:
passed = run_cmp_results(project_dir, self.strictness)
- stdout(f"Completed tests for project {self.project.name} "
- f"(time: {time.time() - start_time:.2f}).\n")
+ self.out(f"Completed tests for project {self.project.name} "
+ f"(time: {time.time() - start_time:.2f}).\n")
return passed
@@ -346,22 +353,23 @@ def get_project_dir(self) -> str:
def get_output_dir(self) -> str:
if self.is_reference_build:
- return REF_PREFIX + OUTPUT_DIR_NAME
+ dirname = REF_PREFIX + OUTPUT_DIR_NAME
else:
- return OUTPUT_DIR_NAME
+ dirname = OUTPUT_DIR_NAME
+
+ return os.path.join(self.get_project_dir(), dirname)
- def build(self, directory: str, output_dir: str):
+ def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
build_log_path = get_build_log_path(output_dir)
- stdout(f"Log file: {build_log_path}\n")
- stdout(f"Output directory: {output_dir}\n")
+ self.out(f"Log file: {build_log_path}\n")
+ self.out(f"Output directory: {output_dir}\n")
remove_log_file(output_dir)
# Clean up scan build results.
if os.path.exists(output_dir):
- if VERBOSE >= 1:
- stdout(f" Removing old results: {output_dir}\n")
+ self.vout(f" Removing old results: {output_dir}\n")
shutil.rmtree(output_dir)
@@ -374,7 +382,7 @@ def build(self, directory: str, output_dir: str):
self._download_and_patch(directory, build_log_file)
run_cleanup_script(directory, build_log_file)
build_time, memory = self.scan_build(directory, output_dir,
- build_log_file)
+ build_log_file)
else:
build_time, memory = self.analyze_preprocessed(directory,
output_dir)
@@ -384,9 +392,11 @@ def build(self, directory: str, output_dir: str):
normalize_reference_results(directory, output_dir,
self.project.mode)
- stdout(f"Build complete (time: {utils.time_to_str(build_time)}, "
- f"peak memory: {utils.memory_to_str(memory)}). "
- f"See the log for more details: {build_log_path}\n")
+ self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
+ f"peak memory: {utils.memory_to_str(memory)}). "
+ f"See the log for more details: {build_log_path}\n")
+
+ return build_time, memory
def scan_build(self, directory: str, output_dir: str,
build_log_file: IO) -> Tuple[float, int]:
@@ -454,8 +464,7 @@ def scan_build(self, directory: str, output_dir: str,
command_to_run = command_prefix + command
- if VERBOSE >= 1:
- stdout(f" Executing: {command_to_run}\n")
+ self.vout(f" Executing: {command_to_run}\n")
time, mem = utils.check_and_measure_call(
command_to_run, cwd=cwd,
@@ -522,8 +531,7 @@ def analyze_preprocessed(self, directory: str,
log_path = os.path.join(fail_path, file_name + ".stderr.txt")
with open(log_path, "w+") as log_file:
try:
- if VERBOSE >= 1:
- stdout(f" Executing: {command}\n")
+ self.vout(f" Executing: {command}\n")
time, mem = utils.check_and_measure_call(
command, cwd=directory, stderr=log_file,
@@ -592,8 +600,10 @@ def _download(self, directory: str, build_log_file: IO):
f"for the '{self.project.name}' project")
def _download_from_git(self, directory: str, build_log_file: IO):
+ repo = self.project.origin
cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
- check_call(f"git clone --recursive {self.project.origin} {cached_source}",
+
+ check_call(f"git clone --recursive {repo} {cached_source}",
cwd=directory, stderr=build_log_file,
stdout=build_log_file, shell=True)
check_call(f"git checkout --quiet {self.project.commit}",
@@ -624,16 +634,15 @@ def _run_download_script(directory: str, build_log_file: IO):
out=LOCAL.stdout, err=LOCAL.stderr,
verbose=VERBOSE)
- @staticmethod
- def _apply_patch(directory: str, build_log_file: IO):
+ def _apply_patch(self, directory: str, build_log_file: IO):
patchfile_path = os.path.join(directory, PATCHFILE_NAME)
patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
if not os.path.exists(patchfile_path):
- stdout(" No local patches.\n")
+ self.out(" No local patches.\n")
return
- stdout(" Applying patch.\n")
+ self.out(" Applying patch.\n")
try:
check_call(f"patch -p1 < '{patchfile_path}'",
cwd=patched_source,
@@ -646,6 +655,14 @@ def _apply_patch(directory: str, build_log_file: IO):
f"See {build_log_file.name} for details.\n")
sys.exit(1)
+ def out(self, what: str):
+ if not self.silent:
+ stdout(what)
+
+ def vout(self, what: str):
+ if VERBOSE >= 1:
+ self.out(what)
+
class TestProjectThread(threading.Thread):
def __init__(self, tasks_queue: TestQueue,
@@ -668,10 +685,7 @@ def run(self):
while not self.tasks_queue.empty():
try:
test_info = self.tasks_queue.get()
-
- Logger = logging.getLogger(test_info.project.name)
- LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
- LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
+ init_logger(test_info.project.name)
tester = ProjectTester(test_info)
if not tester.test():
diff --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py
index 1a2c41d2debf..920fa15e4c6f 100644
--- a/clang/utils/analyzer/SATestUpdateDiffs.py
+++ b/clang/utils/analyzer/SATestUpdateDiffs.py
@@ -21,10 +21,10 @@ def update_reference_results(project: ProjectInfo):
project_dir = tester.get_project_dir()
tester.is_reference_build = True
- ref_results_path = os.path.join(project_dir, tester.get_output_dir())
+ ref_results_path = tester.get_output_dir()
tester.is_reference_build = False
- created_results_path = os.path.join(project_dir, tester.get_output_dir())
+ created_results_path = tester.get_output_dir()
if not os.path.exists(created_results_path):
print("New results not found, was SATestBuild.py previously run?",
diff --git a/clang/utils/analyzer/requirements.txt b/clang/utils/analyzer/requirements.txt
index ec4f66929952..8ae8bc88ac19 100644
--- a/clang/utils/analyzer/requirements.txt
+++ b/clang/utils/analyzer/requirements.txt
@@ -1,4 +1,6 @@
graphviz
humanize
matplotlib
+pandas
psutil
+seaborn
More information about the cfe-commits
mailing list