[llvm-branch-commits] [clang] [LifetimeSafety] Enhance benchmark script for new sub analyses (PR #149577)
Utkarsh Saxena via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Jul 19 08:42:13 PDT 2025
https://github.com/usx95 updated https://github.com/llvm/llvm-project/pull/149577
>From 6cb36540be595794914faa992e59be07e7cede9a Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Sat, 19 Jul 2025 15:40:36 +0000
Subject: [PATCH] add live origins
---
.../test/Analysis/LifetimeSafety/benchmark.py | 230 +++++++++++++-----
1 file changed, 165 insertions(+), 65 deletions(-)
diff --git a/clang/test/Analysis/LifetimeSafety/benchmark.py b/clang/test/Analysis/LifetimeSafety/benchmark.py
index 9d5f36c51b9ee..1ab89d107a238 100644
--- a/clang/test/Analysis/LifetimeSafety/benchmark.py
+++ b/clang/test/Analysis/LifetimeSafety/benchmark.py
@@ -99,28 +99,86 @@ def generate_cpp_merge_test(n: int) -> str:
return cpp_code
-def analyze_trace_file(trace_path: str) -> tuple[float, float]:
+def generate_cpp_nested_loop_test(n: int) -> str:
"""
- Parses the -ftime-trace JSON output to find durations.
+ Generates C++ code with N levels of nested loops.
+ This pattern tests how analysis performance scales with loop nesting depth,
+ which is a key factor in the complexity of dataflow analyses on structured
+ control flow.
- Returns:
- A tuple of (lifetime_analysis_duration_us, total_clang_duration_us).
+ Example (n=3):
+ struct MyObj { int id; ~MyObj() {} };
+ void nested_loops_3() {
+ MyObj* p = nullptr;
+ for(int i0=0; i0<2; ++i0) {
+ MyObj s0;
+ p = &s0;
+ for(int i1=0; i1<2; ++i1) {
+ MyObj s1;
+ p = &s1;
+ for(int i2=0; i2<2; ++i2) {
+ MyObj s2;
+ p = &s2;
+ }
+ }
+ }
+ }
+ """
+ if n <= 0:
+ return "// Nesting depth must be positive."
+
+ cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n"
+ cpp_code += f"void nested_loops_{n}() {{\n"
+ cpp_code += " MyObj* p = nullptr;\n"
+
+ for i in range(n):
+ indent = " " * (i + 1)
+ cpp_code += f"{indent}for(int i{i}=0; i{i}<2; ++i{i}) {{\n"
+ cpp_code += f"{indent} MyObj s{i}; p = &s{i};\n"
+
+ for i in range(n - 1, -1, -1):
+ indent = " " * (i + 1)
+ cpp_code += f"{indent}}}\n"
+
+ cpp_code += "}\n"
+ cpp_code += f"\nint main() {{ nested_loops_{n}(); return 0; }}\n"
+ return cpp_code
+
+
+def analyze_trace_file(trace_path: str) -> dict:
"""
- lifetime_duration = 0.0
- total_duration = 0.0
+ Parses the -ftime-trace JSON output to find durations for the lifetime
+ analysis and its sub-phases.
+ Returns a dictionary of durations in microseconds.
+ """
+ durations = {
+ "lifetime_us": 0.0,
+ "total_us": 0.0,
+ "fact_gen_us": 0.0,
+ "loan_prop_us": 0.0,
+ "expired_loans_us": 0.0,
+ "live_origins_us": 0.0,
+ }
+ event_name_map = {
+ "LifetimeSafetyAnalysis": "lifetime_us",
+ "ExecuteCompiler": "total_us",
+ "FactGenerator": "fact_gen_us",
+ "LoanPropagation": "loan_prop_us",
+ "ExpiredLoans": "expired_loans_us",
+ "LiveOrigins": "live_origins_us",
+ }
try:
with open(trace_path, "r") as f:
trace_data = json.load(f)
for event in trace_data.get("traceEvents", []):
- if event.get("name") == "LifetimeSafetyAnalysis":
- lifetime_duration += float(event.get("dur", 0))
- if event.get("name") == "ExecuteCompiler":
- total_duration += float(event.get("dur", 0))
-
+ event_name = event.get("name")
+ if event_name in event_name_map:
+ key = event_name_map[event_name]
+ durations[key] += float(event.get("dur", 0))
except (IOError, json.JSONDecodeError) as e:
print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr)
- return 0.0, 0.0
- return lifetime_duration, total_duration
+ return {key: 0.0 for key in durations}
+ return durations
def power_law(n, c, k):
@@ -135,8 +193,29 @@ def human_readable_time(ms: float) -> str:
return f"{ms:.2f} ms"
+def calculate_complexity(n_data, y_data) -> tuple[float | None, float | None]:
+ """
+ Calculates the exponent 'k' for the power law fit y = c * n^k.
+ Returns a tuple of (k, k_standard_error).
+ """
+ try:
+ if len(n_data) < 3 or np.all(y_data < 1e-6) or np.var(y_data) < 1e-6:
+ return None, None
+
+ non_zero_indices = y_data > 0
+ if np.sum(non_zero_indices) < 3:
+ return None, None
+
+ n_fit, y_fit = n_data[non_zero_indices], y_data[non_zero_indices]
+ popt, pcov = curve_fit(power_law, n_fit, y_fit, p0=[0, 1], maxfev=5000)
+ k_stderr = np.sqrt(np.diag(pcov))[1]
+ return popt[1], k_stderr
+ except (RuntimeError, ValueError):
+ return None, None
+
+
def generate_markdown_report(results: dict) -> str:
- """Generates a Markdown-formatted report from the benchmark results."""
+ """Generates a concise, Markdown-formatted report from the benchmark results."""
report = []
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z")
report.append(f"# Lifetime Analysis Performance Report")
@@ -146,54 +225,54 @@ def generate_markdown_report(results: dict) -> str:
for test_name, data in results.items():
title = data["title"]
report.append(f"## Test Case: {title}")
- report.append("")
+ report.append("\n**Timing Results:**\n")
# Table header
- report.append("| N | Analysis Time | Total Clang Time |")
- report.append("|:----|--------------:|-----------------:|")
+ report.append(
+ "| N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Expired Loans (%) | Live Origins (%) |"
+ )
+ report.append(
+ "|:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:|------------------:|"
+ )
# Table rows
n_data = np.array(data["n"])
- analysis_data = np.array(data["lifetime_ms"])
- total_data = np.array(data["total_ms"])
+ total_ms_data = np.array(data["total_ms"])
for i in range(len(n_data)):
- analysis_str = human_readable_time(analysis_data[i])
- total_str = human_readable_time(total_data[i])
- report.append(f"| {n_data[i]:<3} | {analysis_str:>13} | {total_str:>16} |")
-
- report.append("")
-
- # Complexity analysis
- report.append(f"**Complexity Analysis:**")
- try:
- # Curve fitting requires at least 3 points
- if len(n_data) < 3:
- raise ValueError("Not enough data points to perform curve fitting.")
-
- popt, pcov = curve_fit(
- power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000
- )
- _, k = popt
-
- # Confidence Interval for k
- alpha = 0.05 # 95% confidence
- dof = max(0, len(n_data) - len(popt)) # degrees of freedom
- t_val = t.ppf(1.0 - alpha / 2.0, dof)
- # Standard error of the parameters
- perr = np.sqrt(np.diag(pcov))
- k_stderr = perr[1]
- k_ci_lower = k - t_val * k_stderr
- k_ci_upper = k + t_val * k_stderr
-
- report.append(
- f"- The performance for this case scales approx. as **O(n<sup>{k:.2f}</sup>)**."
- )
- report.append(
- f"- **95% Confidence interval for exponent:** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`."
- )
+ total_t = total_ms_data[i]
+ if total_t < 1e-6:
+ total_t = 1.0 # Avoid division by zero
+
+ row = [
+ f"| {n_data[i]:<14} |",
+ f"{human_readable_time(total_t):>10} |",
+ f"{data['lifetime_ms'][i] / total_t * 100:>17.2f}% |",
+ f"{data['fact_gen_ms'][i] / total_t * 100:>18.2f}% |",
+ f"{data['loan_prop_ms'][i] / total_t * 100:>20.2f}% |",
+ f"{data['expired_loans_ms'][i] / total_t * 100:>17.2f}% |",
+ f"{data['live_origins_ms'][i] / total_t * 100:>17.2f}% |",
+ ]
+ report.append(" ".join(row))
+
+ report.append("\n**Complexity Analysis:**\n")
+ report.append("| Analysis Phase | Complexity O(n<sup>k</sup>) |")
+ report.append("|:------------------|:--------------------------|")
+
+ analysis_phases = {
+ "Total Analysis": data["lifetime_ms"],
+ "FactGenerator": data["fact_gen_ms"],
+ "LoanPropagation": data["loan_prop_ms"],
+ "ExpiredLoans": data["expired_loans_ms"],
+ "LiveOrigins": data["live_origins_ms"],
+ }
- except (RuntimeError, ValueError) as e:
- report.append(f"- Could not determine a best-fit curve for the data: {e}")
+ for phase_name, y_data in analysis_phases.items():
+ k, delta = calculate_complexity(n_data, np.array(y_data))
+ if k is not None and delta is not None:
+ complexity_str = f"O(n<sup>{k:.2f}</sup> ± {delta:.2f})"
+ else:
+ complexity_str = "(Negligible)"
+ report.append(f"| {phase_name:<17} | {complexity_str:<25} |")
report.append("\n---\n")
@@ -202,7 +281,7 @@ def generate_markdown_report(results: dict) -> str:
def run_single_test(
clang_binary: str, output_dir: str, test_name: str, generator_func, n: int
-) -> tuple[float, float]:
+) -> dict:
"""Generates, compiles, and benchmarks a single test case."""
print(f"--- Running Test: {test_name.capitalize()} with N={n} ---")
@@ -231,11 +310,12 @@ def run_single_test(
if result.returncode != 0:
print(f"Compilation failed for N={n}!", file=sys.stderr)
print(result.stderr, file=sys.stderr)
- return 0.0, 0.0
+ return {}
- lifetime_us, total_us = analyze_trace_file(trace_file)
-
- return lifetime_us / 1000.0, total_us / 1000.0
+ durations_us = analyze_trace_file(trace_file)
+ return {
+ key.replace("_us", "_ms"): value / 1000.0 for key, value in durations_us.items()
+ }
if __name__ == "__main__":
@@ -270,6 +350,12 @@ def run_single_test(
"generator_func": generate_cpp_merge_test,
"n_values": [10, 50, 100, 200, 400, 800],
},
+ {
+ "name": "nested_loops",
+ "title": "Deeply Nested Loops",
+ "generator_func": generate_cpp_nested_loop_test,
+ "n_values": [10, 50, 100, 200, 400, 800],
+ },
]
results = {}
@@ -282,21 +368,30 @@ def run_single_test(
"n": [],
"lifetime_ms": [],
"total_ms": [],
+ "fact_gen_ms": [],
+ "loan_prop_ms": [],
+ "expired_loans_ms": [],
+ "live_origins_ms": [],
}
for n in config["n_values"]:
- lifetime_ms, total_ms = run_single_test(
+ durations_ms = run_single_test(
args.clang_binary,
args.output_dir,
test_name,
config["generator_func"],
n,
)
- if total_ms > 0:
+ if durations_ms:
results[test_name]["n"].append(n)
- results[test_name]["lifetime_ms"].append(lifetime_ms)
- results[test_name]["total_ms"].append(total_ms)
+ for key, value in durations_ms.items():
+ results[test_name][key].append(value)
+
print(
- f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}"
+ f" Total Analysis: {human_readable_time(durations_ms['lifetime_ms'])} | "
+ f"FactGen: {human_readable_time(durations_ms['fact_gen_ms'])} | "
+ f"LoanProp: {human_readable_time(durations_ms['loan_prop_ms'])} | "
+ f"ExpiredLoans: {human_readable_time(durations_ms['expired_loans_ms'])} | "
+ f"LiveOrigins: {human_readable_time(durations_ms['live_origins_ms'])}"
)
print("\n\n" + "=" * 80)
@@ -305,3 +400,8 @@ def run_single_test(
markdown_report = generate_markdown_report(results)
print(markdown_report)
+
+ report_filename = os.path.join(args.output_dir, "performance_report.md")
+ with open(report_filename, "w") as f:
+ f.write(markdown_report)
+ print(f"Report saved to: {report_filename}")
More information about the llvm-branch-commits
mailing list