[llvm-branch-commits] [clang] [LifetimeSafety] Add script performance benchmarking (PR #147315)
Utkarsh Saxena via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jul 7 08:40:09 PDT 2025
https://github.com/usx95 updated https://github.com/llvm/llvm-project/pull/147315
>From 0fbfd74d23b6cd26ef0480f7b9061b2f4a745338 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Mon, 7 Jul 2025 15:13:00 +0000
Subject: [PATCH 1/2] [LifetimeSafety] Add script performance benchmarking
---
clang/lib/Analysis/LifetimeSafety.cpp | 7 +-
.../Analysis/lifetime_safety/benchmark.py | 215 ++++++++++++++++++
2 files changed, 221 insertions(+), 1 deletion(-)
create mode 100644 clang/test/Analysis/lifetime_safety/benchmark.py
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
index e881e592ef59f..1c83b5051bad1 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -151,7 +151,12 @@ class OriginManager {
OriginID get(const ValueDecl &D) {
auto It = DeclToOriginID.find(&D);
- assert(It != DeclToOriginID.end());
+ // TODO: This should be an assert(It != ExprToOriginID.end()). The current
+ // implementation falls back to getOrCreate to avoid crashing on
+ // yet-unhandled pointer expressions, creating an empty origin for them.
+ if (It == DeclToOriginID.end())
+ return getOrCreate(D);
+
return It->second;
}
diff --git a/clang/test/Analysis/lifetime_safety/benchmark.py b/clang/test/Analysis/lifetime_safety/benchmark.py
new file mode 100644
index 0000000000000..ddf32e192de17
--- /dev/null
+++ b/clang/test/Analysis/lifetime_safety/benchmark.py
@@ -0,0 +1,215 @@
+import sys
+import argparse
+import subprocess
+import tempfile
+import json
+import os
+from datetime import datetime
+import numpy as np
+from scipy.optimize import curve_fit
+from scipy.stats import t
+
+def generate_cpp_cycle_test(n: int) -> str:
+ """
+ Generates a C++ code snippet with a specified number of pointers in a cycle.
+ """
+ if n <= 0:
+ return "// Number of variables must be positive."
+
+ cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n"
+ cpp_code += f"void long_cycle_{n}(bool condition) {{\n"
+ for i in range(1, n + 1):
+ cpp_code += f" MyObj v{i}{{1}};\n"
+ cpp_code += "\n"
+ for i in range(1, n + 1):
+ cpp_code += f" MyObj* p{i} = &v{i};\n"
+
+ cpp_code += "\n while (condition) {\n"
+ if n > 0:
+ cpp_code += f" MyObj* temp = p1;\n"
+ for i in range(1, n):
+ cpp_code += f" p{i} = p{i+1};\n"
+ cpp_code += f" p{n} = temp;\n"
+ cpp_code += " }\n}\n"
+ cpp_code += f"\nint main() {{ long_cycle_{n}(false); return 0; }}\n"
+ return cpp_code
+
+def generate_cpp_merge_test(n: int) -> str:
+ """
+ Generates a C++ code snippet with N independent conditional assignments.
+ """
+ if n <= 0:
+ return "// Number of variables must be positive."
+
+ cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n"
+ cpp_code += f"void conditional_merges_{n}(bool condition) {{\n"
+ decls = [f"v{i}" for i in range(1, n + 1)]
+ cpp_code += f" MyObj {', '.join(decls)};\n"
+ ptr_decls = [f"*p{i} = nullptr" for i in range(1, n + 1)]
+ cpp_code += f" MyObj {', '.join(ptr_decls)};\n\n"
+
+ for i in range(1, n + 1):
+ cpp_code += f" if(condition) {{ p{i} = &v{i}; }}\n"
+
+ cpp_code += "}\n"
+ cpp_code += f"\nint main() {{ conditional_merges_{n}(false); return 0; }}\n"
+ return cpp_code
+
+def analyze_trace_file(trace_path: str) -> tuple[float, float]:
+ """
+ Parses the -ftime-trace JSON output to find durations.
+
+ Returns:
+ A tuple of (lifetime_analysis_duration_us, total_clang_duration_us).
+ """
+ lifetime_duration = 0.0
+ total_duration = 0.0
+ try:
+ with open(trace_path, 'r') as f:
+ trace_data = json.load(f)
+ for event in trace_data.get('traceEvents', []):
+ if event.get('name') == 'LifetimeAnalysis':
+ lifetime_duration += float(event.get('dur', 0))
+ if event.get('name') == 'ExecuteCompiler':
+ total_duration += float(event.get('dur', 0))
+
+ except (IOError, json.JSONDecodeError) as e:
+ print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr)
+ return 0.0, 0.0
+ return lifetime_duration, total_duration
+
+def power_law(n, c, k):
+ """Represents the power law function: y = c * n^k"""
+ return c * np.power(n, k)
+
+def human_readable_time(ms: float) -> str:
+ """Converts milliseconds to a human-readable string (ms or s)."""
+ if ms >= 1000:
+ return f"{ms / 1000:.2f} s"
+ return f"{ms:.2f} ms"
+
+def generate_markdown_report(results: dict) -> str:
+ """Generates a Markdown-formatted report from the benchmark results."""
+ report = []
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z")
+ report.append(f"# Lifetime Analysis Performance Report")
+ report.append(f"> Generated on: {timestamp}")
+ report.append("\n---\n")
+
+ for test_type, data in results.items():
+ title = 'Pointer Cycle in Loop' if test_type == 'cycle' else 'CFG Merges'
+ report.append(f"## Test Case: {title}")
+ report.append("")
+
+ # Table header
+ report.append("| N | Analysis Time | Total Clang Time |")
+ report.append("|:----|--------------:|-----------------:|")
+
+ # Table rows
+ n_data = np.array(data['n'])
+ analysis_data = np.array(data['lifetime_ms'])
+ total_data = np.array(data['total_ms'])
+ for i in range(len(n_data)):
+ analysis_str = human_readable_time(analysis_data[i])
+ total_str = human_readable_time(total_data[i])
+ report.append(f"| {n_data[i]:<3} | {analysis_str:>13} | {total_str:>16} |")
+
+ report.append("")
+
+ # Complexity analysis
+ report.append(f"**Complexity Analysis:**")
+ try:
+ popt, pcov = curve_fit(power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000)
+ _, k = popt
+
+ # R-squared calculation
+ residuals = analysis_data - power_law(n_data, *popt)
+ ss_res = np.sum(residuals**2)
+ ss_tot = np.sum((analysis_data - np.mean(analysis_data))**2)
+ r_squared = 1 - (ss_res / ss_tot)
+
+ # Confidence Interval for k
+ alpha = 0.05 # 95% confidence
+ dof = max(0, len(n_data) - len(popt)) # degrees of freedom
+ t_val = t.ppf(1.0 - alpha / 2., dof)
+ # Standard error of the parameters
+ perr = np.sqrt(np.diag(pcov))
+ k_stderr = perr[1]
+ k_ci_lower = k - t_val * k_stderr
+ k_ci_upper = k + t_val * k_stderr
+
+ report.append(f"- The performance of the analysis for this case scales approximately as **O(n<sup>{k:.2f}</sup>)**.")
+ report.append(f"- **Goodness of Fit (R²):** `{r_squared:.4f}` (closer to 1.0 is better).")
+ report.append(f"- **95% Confidence Interval for exponent 'k':** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`.")
+
+ except RuntimeError:
+ report.append("- Could not determine a best-fit curve for the data.")
+
+ report.append("\n---\n")
+
+ return "\n".join(report)
+
+def run_single_test(clang_binary: str, test_type: str, n: int) -> tuple[float, float]:
+ """Generates, compiles, and benchmarks a single test case."""
+ print(f"--- Running Test: {test_type.capitalize()} with N={n} ---")
+
+ generated_code = ""
+ if test_type == 'cycle':
+ generated_code = generate_cpp_cycle_test(n)
+ else: # merge
+ generated_code = generate_cpp_merge_test(n)
+
+ with tempfile.NamedTemporaryFile(mode='w+', suffix='.cpp', delete=False) as tmp_cpp:
+ tmp_cpp.write(generated_code)
+ source_file = tmp_cpp.name
+
+ trace_file = os.path.splitext(source_file)[0] + '.json'
+
+ clang_command = [
+ clang_binary, '-c', '-o', '/dev/null', '-ftime-trace=' + trace_file,
+ '-Wexperimental-lifetime-safety', '-std=c++17', source_file
+ ]
+
+ result = subprocess.run(clang_command, capture_output=True, text=True)
+
+ if result.returncode != 0:
+ print(f"Compilation failed for N={n}!", file=sys.stderr)
+ print(result.stderr, file=sys.stderr)
+ os.remove(source_file)
+ return 0.0, 0.0
+
+ lifetime_us, total_us = analyze_trace_file(trace_file)
+ os.remove(source_file)
+ os.remove(trace_file)
+
+ return lifetime_us / 1000.0, total_us / 1000.0
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Generate, compile, and benchmark C++ test cases for Clang's lifetime analysis.")
+ parser.add_argument("--clang-binary", type=str, required=True, help="Path to the Clang executable.")
+
+ args = parser.parse_args()
+
+ n_values = [10, 25, 50, 75, 100, 150, 200]
+ results = {
+ 'cycle': {'n': [], 'lifetime_ms': [], 'total_ms': []},
+ 'merge': {'n': [], 'lifetime_ms': [], 'total_ms': []}
+ }
+
+ print("Running performance benchmarks...")
+ for test_type in ['cycle', 'merge']:
+ for n in n_values:
+ lifetime_ms, total_ms = run_single_test(args.clang_binary, test_type, n)
+ if total_ms > 0:
+ results[test_type]['n'].append(n)
+ results[test_type]['lifetime_ms'].append(lifetime_ms)
+ results[test_type]['total_ms'].append(total_ms)
+ print(f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}")
+
+ print("\n\n" + "="*80)
+ print("Generating Markdown Report...")
+ print("="*80 + "\n")
+
+ markdown_report = generate_markdown_report(results)
+ print(markdown_report)
+
>From b09513a7e91143450bf079badc54d19b3a873f95 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Mon, 7 Jul 2025 15:39:55 +0000
Subject: [PATCH 2/2] format
---
.../Analysis/lifetime_safety/benchmark.py | 117 +++++++++++-------
1 file changed, 72 insertions(+), 45 deletions(-)
diff --git a/clang/test/Analysis/lifetime_safety/benchmark.py b/clang/test/Analysis/lifetime_safety/benchmark.py
index ddf32e192de17..259be915cd043 100644
--- a/clang/test/Analysis/lifetime_safety/benchmark.py
+++ b/clang/test/Analysis/lifetime_safety/benchmark.py
@@ -9,6 +9,7 @@
from scipy.optimize import curve_fit
from scipy.stats import t
+
def generate_cpp_cycle_test(n: int) -> str:
"""
Generates a C++ code snippet with a specified number of pointers in a cycle.
@@ -34,6 +35,7 @@ def generate_cpp_cycle_test(n: int) -> str:
cpp_code += f"\nint main() {{ long_cycle_{n}(false); return 0; }}\n"
return cpp_code
+
def generate_cpp_merge_test(n: int) -> str:
"""
Generates a C++ code snippet with N independent conditional assignments.
@@ -55,6 +57,7 @@ def generate_cpp_merge_test(n: int) -> str:
cpp_code += f"\nint main() {{ conditional_merges_{n}(false); return 0; }}\n"
return cpp_code
+
def analyze_trace_file(trace_path: str) -> tuple[float, float]:
"""
Parses the -ftime-trace JSON output to find durations.
@@ -65,29 +68,32 @@ def analyze_trace_file(trace_path: str) -> tuple[float, float]:
lifetime_duration = 0.0
total_duration = 0.0
try:
- with open(trace_path, 'r') as f:
+ with open(trace_path, "r") as f:
trace_data = json.load(f)
- for event in trace_data.get('traceEvents', []):
- if event.get('name') == 'LifetimeAnalysis':
- lifetime_duration += float(event.get('dur', 0))
- if event.get('name') == 'ExecuteCompiler':
- total_duration += float(event.get('dur', 0))
+ for event in trace_data.get("traceEvents", []):
+ if event.get("name") == "LifetimeAnalysis":
+ lifetime_duration += float(event.get("dur", 0))
+ if event.get("name") == "ExecuteCompiler":
+ total_duration += float(event.get("dur", 0))
except (IOError, json.JSONDecodeError) as e:
print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr)
return 0.0, 0.0
return lifetime_duration, total_duration
+
def power_law(n, c, k):
"""Represents the power law function: y = c * n^k"""
return c * np.power(n, k)
+
def human_readable_time(ms: float) -> str:
"""Converts milliseconds to a human-readable string (ms or s)."""
if ms >= 1000:
return f"{ms / 1000:.2f} s"
return f"{ms:.2f} ms"
+
def generate_markdown_report(results: dict) -> str:
"""Generates a Markdown-formatted report from the benchmark results."""
report = []
@@ -97,7 +103,7 @@ def generate_markdown_report(results: dict) -> str:
report.append("\n---\n")
for test_type, data in results.items():
- title = 'Pointer Cycle in Loop' if test_type == 'cycle' else 'CFG Merges'
+ title = "Pointer Cycle in Loop" if test_type == "cycle" else "CFG Merges"
report.append(f"## Test Case: {title}")
report.append("")
@@ -106,9 +112,9 @@ def generate_markdown_report(results: dict) -> str:
report.append("|:----|--------------:|-----------------:|")
# Table rows
- n_data = np.array(data['n'])
- analysis_data = np.array(data['lifetime_ms'])
- total_data = np.array(data['total_ms'])
+ n_data = np.array(data["n"])
+ analysis_data = np.array(data["lifetime_ms"])
+ total_data = np.array(data["total_ms"])
for i in range(len(n_data)):
analysis_str = human_readable_time(analysis_data[i])
total_str = human_readable_time(total_data[i])
@@ -119,28 +125,36 @@ def generate_markdown_report(results: dict) -> str:
# Complexity analysis
report.append(f"**Complexity Analysis:**")
try:
- popt, pcov = curve_fit(power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000)
+ popt, pcov = curve_fit(
+ power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000
+ )
_, k = popt
-
+
# R-squared calculation
residuals = analysis_data - power_law(n_data, *popt)
ss_res = np.sum(residuals**2)
- ss_tot = np.sum((analysis_data - np.mean(analysis_data))**2)
+ ss_tot = np.sum((analysis_data - np.mean(analysis_data)) ** 2)
r_squared = 1 - (ss_res / ss_tot)
-
+
# Confidence Interval for k
alpha = 0.05 # 95% confidence
- dof = max(0, len(n_data) - len(popt)) # degrees of freedom
- t_val = t.ppf(1.0 - alpha / 2., dof)
+ dof = max(0, len(n_data) - len(popt)) # degrees of freedom
+ t_val = t.ppf(1.0 - alpha / 2.0, dof)
# Standard error of the parameters
perr = np.sqrt(np.diag(pcov))
k_stderr = perr[1]
k_ci_lower = k - t_val * k_stderr
k_ci_upper = k + t_val * k_stderr
- report.append(f"- The performance of the analysis for this case scales approximately as **O(n<sup>{k:.2f}</sup>)**.")
- report.append(f"- **Goodness of Fit (R²):** `{r_squared:.4f}` (closer to 1.0 is better).")
- report.append(f"- **95% Confidence Interval for exponent 'k':** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`.")
+ report.append(
+ f"- The performance of the analysis for this case scales approximately as **O(n<sup>{k:.2f}</sup>)**."
+ )
+ report.append(
+ f"- **Goodness of Fit (R²):** `{r_squared:.4f}` (closer to 1.0 is better)."
+ )
+ report.append(
+ f"- **95% Confidence Interval for exponent 'k':** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`."
+ )
except RuntimeError:
report.append("- Could not determine a best-fit curve for the data.")
@@ -149,67 +163,80 @@ def generate_markdown_report(results: dict) -> str:
return "\n".join(report)
+
def run_single_test(clang_binary: str, test_type: str, n: int) -> tuple[float, float]:
"""Generates, compiles, and benchmarks a single test case."""
print(f"--- Running Test: {test_type.capitalize()} with N={n} ---")
-
+
generated_code = ""
- if test_type == 'cycle':
+ if test_type == "cycle":
generated_code = generate_cpp_cycle_test(n)
- else: # merge
+ else: # merge
generated_code = generate_cpp_merge_test(n)
- with tempfile.NamedTemporaryFile(mode='w+', suffix='.cpp', delete=False) as tmp_cpp:
+ with tempfile.NamedTemporaryFile(mode="w+", suffix=".cpp", delete=False) as tmp_cpp:
tmp_cpp.write(generated_code)
source_file = tmp_cpp.name
-
- trace_file = os.path.splitext(source_file)[0] + '.json'
+
+ trace_file = os.path.splitext(source_file)[0] + ".json"
clang_command = [
- clang_binary, '-c', '-o', '/dev/null', '-ftime-trace=' + trace_file,
- '-Wexperimental-lifetime-safety', '-std=c++17', source_file
+ clang_binary,
+ "-c",
+ "-o",
+ "/dev/null",
+ "-ftime-trace=" + trace_file,
+ "-Wexperimental-lifetime-safety",
+ "-std=c++17",
+ source_file,
]
result = subprocess.run(clang_command, capture_output=True, text=True)
-
+
if result.returncode != 0:
print(f"Compilation failed for N={n}!", file=sys.stderr)
print(result.stderr, file=sys.stderr)
os.remove(source_file)
return 0.0, 0.0
-
+
lifetime_us, total_us = analyze_trace_file(trace_file)
os.remove(source_file)
os.remove(trace_file)
-
+
return lifetime_us / 1000.0, total_us / 1000.0
+
if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Generate, compile, and benchmark C++ test cases for Clang's lifetime analysis.")
- parser.add_argument("--clang-binary", type=str, required=True, help="Path to the Clang executable.")
-
+ parser = argparse.ArgumentParser(
+ description="Generate, compile, and benchmark C++ test cases for Clang's lifetime analysis."
+ )
+ parser.add_argument(
+ "--clang-binary", type=str, required=True, help="Path to the Clang executable."
+ )
+
args = parser.parse_args()
n_values = [10, 25, 50, 75, 100, 150, 200]
results = {
- 'cycle': {'n': [], 'lifetime_ms': [], 'total_ms': []},
- 'merge': {'n': [], 'lifetime_ms': [], 'total_ms': []}
+ "cycle": {"n": [], "lifetime_ms": [], "total_ms": []},
+ "merge": {"n": [], "lifetime_ms": [], "total_ms": []},
}
print("Running performance benchmarks...")
- for test_type in ['cycle', 'merge']:
+ for test_type in ["cycle", "merge"]:
for n in n_values:
lifetime_ms, total_ms = run_single_test(args.clang_binary, test_type, n)
if total_ms > 0:
- results[test_type]['n'].append(n)
- results[test_type]['lifetime_ms'].append(lifetime_ms)
- results[test_type]['total_ms'].append(total_ms)
- print(f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}")
-
- print("\n\n" + "="*80)
+ results[test_type]["n"].append(n)
+ results[test_type]["lifetime_ms"].append(lifetime_ms)
+ results[test_type]["total_ms"].append(total_ms)
+ print(
+ f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}"
+ )
+
+ print("\n\n" + "=" * 80)
print("Generating Markdown Report...")
- print("="*80 + "\n")
-
+ print("=" * 80 + "\n")
+
markdown_report = generate_markdown_report(results)
print(markdown_report)
-
More information about the llvm-branch-commits
mailing list