[libcxx-commits] [libcxx] 0acfdbd - [libc++] Improve historical benchmark visualization

Wed Sep 24 10:52:38 PDT 2025

Author: Louis Dionne
Date: 2025-09-24T13:51:26-04:00
New Revision: 0acfdbd7e2b5c66d3611eee8164f8e4f7b6e42f8

URL: https://github.com/llvm/llvm-project/commit/0acfdbd7e2b5c66d3611eee8164f8e4f7b6e42f8
DIFF: https://github.com/llvm/llvm-project/commit/0acfdbd7e2b5c66d3611eee8164f8e4f7b6e42f8.diff

LOG: [libc++] Improve historical benchmark visualization

- Use LOWESS instead of OLS trendlines, it tends to fit data better
- Plot using the commit date instead of the arbitrary revlist order
- Fix progress bar reporting when we prefetch Git commit data
- Allow adding a subtitle to charts, which is helpful to stay organized
- Ensure that series are always presented in the same (alphabetical) order

Added: 
    

Modified: 
    libcxx/utils/requirements.txt
    libcxx/utils/visualize-historical

Removed: 
    


################################################################################
diff  --git a/libcxx/utils/requirements.txt b/libcxx/utils/requirements.txt
index 7cb5a4b6be446..1ec769c8693dc 100644

--- a/libcxx/utils/requirements.txt
+++ b/libcxx/utils/requirements.txt
@@ -1,3 +1,4 @@
+GitPython
 numpy
 pandas
 plotly

diff  --git a/libcxx/utils/visualize-historical b/libcxx/utils/visualize-historical
index f6bec3dee4a15..661a9ba99b163 100755
--- a/libcxx/utils/visualize-historical
+++ b/libcxx/utils/visualize-historical
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 import argparse
+import datetime
 import functools
 import os
 import pathlib
@@ -10,6 +11,7 @@ import subprocess
 import sys
 import tempfile
 
+import git
 import pandas
 import plotly
 import plotly.express
@@ -74,6 +76,14 @@ class Commit:
         """
         return subprocess.check_output(['git', '-C', self._git_repo, 'rev-parse', self._sha], text=True).strip()
 
+    @functools.cached_property
+    def commit_date(self):
+        """
+        Return the date of the commit as a `datetime.datetime` object.
+        """
+        repo = git.Repo(self._git_repo)
+        return datetime.datetime.fromtimestamp(repo.commit(self._sha).committed_date)
+
     def prefetch(self):
         """
         Prefetch cached properties associated to this commit object.
@@ -81,6 +91,7 @@ class Commit:
         This makes it possible to control when time is spent recovering that information from Git for
         e.g. better reporting to the user.
         """
+        self.commit_date
         self.fullrev
         self.shortrev
         self.show()
@@ -101,20 +112,21 @@ def truncate_lines(string, n, marker=None):
     assert len(truncated) <= n, "broken post-condition"
     return '\n'.join(truncated)
 
-def create_plot(data, metric):
+def create_plot(data, metric, subtitle=None):
     """
     Create a plot object showing the evolution of each benchmark throughout the given commits for
     the given metric.
     """
-    data = data.sort_values(by='revlist_order')
+    data = data.sort_values(by=['date', 'benchmark'])
     revlist = pandas.unique(data['commit']) # list of all commits in chronological order
     hover_info = {c: truncate_lines(c.show(), 30, marker='...').replace('\n', '<br>') for c in revlist}
     figure = plotly.express.scatter(data, title=f"{revlist[0].shortrev} to {revlist[-1].shortrev}",
-                                          x='revlist_order', y=metric,
+                                          subtitle=subtitle,
+                                          x='date', y=metric,
                                           symbol='benchmark',
                                           color='benchmark',
                                           hover_name=[hover_info[c] for c in data['commit']],
-                                          trendline="ols")
+                                          trendline="lowess")
     return figure
 
 def directory_path(string):
@@ -184,7 +196,7 @@ def main(argv):
         description='Visualize historical data in LNT format. This program generates a HTML file that embeds an '
                     'interactive plot with the provided data. The HTML file can then be opened in a browser to '
                     'visualize the data as a chart.',
-        epilog='This script depends on the `plotly` and the `tqdm` Python modules.')
+        epilog='This script depends on the modules listed in `libcxx/utils/requirements.txt`.')
     parser.add_argument('directory', type=directory_path,
         help='Path to a valid directory containing benchmark data in LNT format, each file being named <commit>.lnt. '
              'This is also the format generated by the `benchmark-historical` utility.')
@@ -208,6 +220,8 @@ def main(argv):
              'floating point number, e.g. 0.25 will detect points that 
diff er by more than 25%% from their previous '
              'result. This option respects --filter, i.e. only benchmarks that match the filter will be analyzed for '
              'outliers.')
+    parser.add_argument('--subtitle', type=str, required=False,
+        help='Optional subtitle for the chart. This can be used to help identify the contents of the chart.')
     parser.add_argument('--git-repo', type=directory_path, default=pathlib.Path(os.getcwd()),
         help='Path to the git repository to use for ordering commits in time. '
              'By default, the current working directory is used.')
@@ -217,26 +231,27 @@ def main(argv):
     args = parser.parse_args(argv)
 
     # Extract benchmark data from the directory.
-    data = []
+    data = {}
     files = [f for f in args.directory.glob('*.lnt')]
     for file in tqdm.tqdm(files, desc='Parsing LNT files'):
+        rows = parse_lnt(file.read_text().splitlines())
         (commit, _) = os.path.splitext(os.path.basename(file))
         commit = Commit(args.git_repo, commit)
-        with open(file, 'r') as f:
-            rows = parse_lnt(f.readlines())
-        data.extend((commit, row) for row in rows)
+        data[commit] = rows
 
     # Obtain commit information which is then cached throughout the program. Do this
     # eagerly so we can provide a progress bar.
-    for (commit, _) in tqdm.tqdm(data, desc='Prefetching Git information'):
+    for commit in tqdm.tqdm(data.keys(), desc='Prefetching Git information'):
         commit.prefetch()
 
     # Create a dataframe from the raw data and add some columns to it:
     # - 'commit' represents the Commit object associated to the results in that row
     # - `revlist_order` represents the order of the commit within the Git repository.
-    data = pandas.DataFrame([row | {'commit': commit} for (commit, row) in data])
-    revlist = sorted_revlist(args.git_repo, [c.fullrev for c in set(data['commit'])])
+    # - `date` represents the commit date
+    revlist = sorted_revlist(args.git_repo, [c.fullrev for c in data.keys()])
+    data = pandas.DataFrame([row | {'commit': c} for (c, rows) in data.items() for row in rows])
     data = data.join(pandas.DataFrame([{'revlist_order': revlist.index(c.fullrev)} for c in data['commit']]))
+    data = data.join(pandas.DataFrame([{'date': c.commit_date} for c in data['commit']]))
 
     # Filter the benchmarks if needed.
     if args.filter is not None:
@@ -254,7 +269,7 @@ def main(argv):
         return
 
     # Plot the data for all the required benchmarks.
-    figure = create_plot(data, args.metric)
+    figure = create_plot(data, args.metric, subtitle=args.subtitle)
     do_open = args.output is None or args.open
     output = args.output if args.output is not None else tempfile.NamedTemporaryFile(suffix='.html').name
     plotly.io.write_html(figure, file=output, auto_open=do_open)