[libcxx-commits] [libcxx] 0acfdbd - [libc++] Improve historical benchmark visualization
Louis Dionne via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Sep 24 10:52:38 PDT 2025
Author: Louis Dionne
Date: 2025-09-24T13:51:26-04:00
New Revision: 0acfdbd7e2b5c66d3611eee8164f8e4f7b6e42f8
URL: https://github.com/llvm/llvm-project/commit/0acfdbd7e2b5c66d3611eee8164f8e4f7b6e42f8
DIFF: https://github.com/llvm/llvm-project/commit/0acfdbd7e2b5c66d3611eee8164f8e4f7b6e42f8.diff
LOG: [libc++] Improve historical benchmark visualization
- Use LOWESS instead of OLS trendlines, it tends to fit data better
- Plot using the commit date instead of the arbitrary revlist order
- Fix progress bar reporting when we prefetch Git commit data
- Allow adding a subtitle to charts, which is helpful to stay organized
- Ensure that series are always presented in the same (alphabetical) order
Added:
Modified:
libcxx/utils/requirements.txt
libcxx/utils/visualize-historical
Removed:
################################################################################
diff --git a/libcxx/utils/requirements.txt b/libcxx/utils/requirements.txt
index 7cb5a4b6be446..1ec769c8693dc 100644
--- a/libcxx/utils/requirements.txt
+++ b/libcxx/utils/requirements.txt
@@ -1,3 +1,4 @@
+GitPython
numpy
pandas
plotly
diff --git a/libcxx/utils/visualize-historical b/libcxx/utils/visualize-historical
index f6bec3dee4a15..661a9ba99b163 100755
--- a/libcxx/utils/visualize-historical
+++ b/libcxx/utils/visualize-historical
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import argparse
+import datetime
import functools
import os
import pathlib
@@ -10,6 +11,7 @@ import subprocess
import sys
import tempfile
+import git
import pandas
import plotly
import plotly.express
@@ -74,6 +76,14 @@ class Commit:
"""
return subprocess.check_output(['git', '-C', self._git_repo, 'rev-parse', self._sha], text=True).strip()
+ @functools.cached_property
+ def commit_date(self):
+ """
+ Return the date of the commit as a `datetime.datetime` object.
+ """
+ repo = git.Repo(self._git_repo)
+ return datetime.datetime.fromtimestamp(repo.commit(self._sha).committed_date)
+
def prefetch(self):
"""
Prefetch cached properties associated to this commit object.
@@ -81,6 +91,7 @@ class Commit:
This makes it possible to control when time is spent recovering that information from Git for
e.g. better reporting to the user.
"""
+ self.commit_date
self.fullrev
self.shortrev
self.show()
@@ -101,20 +112,21 @@ def truncate_lines(string, n, marker=None):
assert len(truncated) <= n, "broken post-condition"
return '\n'.join(truncated)
-def create_plot(data, metric):
+def create_plot(data, metric, subtitle=None):
"""
Create a plot object showing the evolution of each benchmark throughout the given commits for
the given metric.
"""
- data = data.sort_values(by='revlist_order')
+ data = data.sort_values(by=['date', 'benchmark'])
revlist = pandas.unique(data['commit']) # list of all commits in chronological order
hover_info = {c: truncate_lines(c.show(), 30, marker='...').replace('\n', '<br>') for c in revlist}
figure = plotly.express.scatter(data, title=f"{revlist[0].shortrev} to {revlist[-1].shortrev}",
- x='revlist_order', y=metric,
+ subtitle=subtitle,
+ x='date', y=metric,
symbol='benchmark',
color='benchmark',
hover_name=[hover_info[c] for c in data['commit']],
- trendline="ols")
+ trendline="lowess")
return figure
def directory_path(string):
@@ -184,7 +196,7 @@ def main(argv):
description='Visualize historical data in LNT format. This program generates a HTML file that embeds an '
'interactive plot with the provided data. The HTML file can then be opened in a browser to '
'visualize the data as a chart.',
- epilog='This script depends on the `plotly` and the `tqdm` Python modules.')
+ epilog='This script depends on the modules listed in `libcxx/utils/requirements.txt`.')
parser.add_argument('directory', type=directory_path,
help='Path to a valid directory containing benchmark data in LNT format, each file being named <commit>.lnt. '
'This is also the format generated by the `benchmark-historical` utility.')
@@ -208,6 +220,8 @@ def main(argv):
'floating point number, e.g. 0.25 will detect points that
diff er by more than 25%% from their previous '
'result. This option respects --filter, i.e. only benchmarks that match the filter will be analyzed for '
'outliers.')
+ parser.add_argument('--subtitle', type=str, required=False,
+ help='Optional subtitle for the chart. This can be used to help identify the contents of the chart.')
parser.add_argument('--git-repo', type=directory_path, default=pathlib.Path(os.getcwd()),
help='Path to the git repository to use for ordering commits in time. '
'By default, the current working directory is used.')
@@ -217,26 +231,27 @@ def main(argv):
args = parser.parse_args(argv)
# Extract benchmark data from the directory.
- data = []
+ data = {}
files = [f for f in args.directory.glob('*.lnt')]
for file in tqdm.tqdm(files, desc='Parsing LNT files'):
+ rows = parse_lnt(file.read_text().splitlines())
(commit, _) = os.path.splitext(os.path.basename(file))
commit = Commit(args.git_repo, commit)
- with open(file, 'r') as f:
- rows = parse_lnt(f.readlines())
- data.extend((commit, row) for row in rows)
+ data[commit] = rows
# Obtain commit information which is then cached throughout the program. Do this
# eagerly so we can provide a progress bar.
- for (commit, _) in tqdm.tqdm(data, desc='Prefetching Git information'):
+ for commit in tqdm.tqdm(data.keys(), desc='Prefetching Git information'):
commit.prefetch()
# Create a dataframe from the raw data and add some columns to it:
# - 'commit' represents the Commit object associated to the results in that row
# - `revlist_order` represents the order of the commit within the Git repository.
- data = pandas.DataFrame([row | {'commit': commit} for (commit, row) in data])
- revlist = sorted_revlist(args.git_repo, [c.fullrev for c in set(data['commit'])])
+ # - `date` represents the commit date
+ revlist = sorted_revlist(args.git_repo, [c.fullrev for c in data.keys()])
+ data = pandas.DataFrame([row | {'commit': c} for (c, rows) in data.items() for row in rows])
data = data.join(pandas.DataFrame([{'revlist_order': revlist.index(c.fullrev)} for c in data['commit']]))
+ data = data.join(pandas.DataFrame([{'date': c.commit_date} for c in data['commit']]))
# Filter the benchmarks if needed.
if args.filter is not None:
@@ -254,7 +269,7 @@ def main(argv):
return
# Plot the data for all the required benchmarks.
- figure = create_plot(data, args.metric)
+ figure = create_plot(data, args.metric, subtitle=args.subtitle)
do_open = args.output is None or args.open
output = args.output if args.output is not None else tempfile.NamedTemporaryFile(suffix='.html').name
plotly.io.write_html(figure, file=output, auto_open=do_open)
More information about the libcxx-commits
mailing list