https://github.com/usx95 updated https://github.com/llvm/llvm-project/pull/147315
>From 014d81d9da31df3cf46bd8fc5f7cb470b3271b8e Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 7 Jul 2025 15:13:00 +0000 Subject: [PATCH] [LifetimeSafety] Add script performance benchmarking --- clang/lib/Analysis/LifetimeSafety.cpp | 7 +- .../Analysis/LifetimeSafety/CMakeLists.txt | 49 +++ .../test/Analysis/LifetimeSafety/benchmark.py | 308 ++++++++++++++++++ .../Analysis/LifetimeSafety/requirements.txt | 2 + clang/test/CMakeLists.txt | 2 + 5 files changed, 367 insertions(+), 1 deletion(-) create mode 100644 clang/test/Analysis/LifetimeSafety/CMakeLists.txt create mode 100644 clang/test/Analysis/LifetimeSafety/benchmark.py create mode 100644 clang/test/Analysis/LifetimeSafety/requirements.txt diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index e881e592ef59f..1c83b5051bad1 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -151,7 +151,12 @@ class OriginManager { OriginID get(const ValueDecl &D) { auto It = DeclToOriginID.find(&D); - assert(It != DeclToOriginID.end()); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + return It->second; } diff --git a/clang/test/Analysis/LifetimeSafety/CMakeLists.txt b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt new file mode 100644 index 0000000000000..ce37a29655668 --- /dev/null +++ b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt @@ -0,0 +1,49 @@ +# ================================================================================= +# Lifetime Analysis Benchmarking Target +# ================================================================================= +# This target allows running performance benchmarks for the clang lifetime analysis +# using a Python script (with managed dependencies). + +find_package(Python3 COMPONENTS Interpreter REQUIRED) + +# Define paths for the virtual environment and requirements file. +set(LIFETIME_BENCHMARK_SCRIPT + "${CMAKE_CURRENT_SOURCE_DIR}/benchmark.py") +set(LIFETIME_BENCHMARK_VENV_DIR "${CMAKE_CURRENT_BINARY_DIR}/benchmark-venv") +set(LIFETIME_BENCHMARK_REQUIREMENTS + "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt") +set(LIFETIME_BENCHMARK_OUTPUT_DIR + "${CMAKE_CURRENT_BINARY_DIR}/benchmark_results") + + +if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREMENTS}) + + # Set up the virtual environment and install packages + add_custom_command( + OUTPUT ${LIFETIME_BENCHMARK_VENV_DIR}/pyvenv.cfg + COMMAND ${Python3_EXECUTABLE} -m venv ${LIFETIME_BENCHMARK_VENV_DIR} + COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python -m pip install -r ${LIFETIME_BENCHMARK_REQUIREMENTS} + DEPENDS ${LIFETIME_BENCHMARK_REQUIREMENTS} + COMMENT "Creating Python virtual environment and installing dependencies for benchmark..." + ) + add_custom_target(benchmark_venv_setup + DEPENDS ${LIFETIME_BENCHMARK_VENV_DIR}/pyvenv.cfg + ) + + # Main benchmark target + add_custom_target(benchmark_lifetime_safety_analysis + COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python ${LIFETIME_BENCHMARK_SCRIPT} + --clang-binary ${LLVM_BINARY_DIR}/bin/clang + --output-dir ${LIFETIME_BENCHMARK_OUTPUT_DIR} + + DEPENDS clang benchmark_venv_setup + + # Display the output directly in the console. + USES_TERMINAL + + COMMENT "Running Lifetime Analysis performance benchmarks..." + ) + + set_target_properties(benchmark_lifetime_safety_analysis + PROPERTIES FOLDER "Clang/Benchmarks") +endif() diff --git a/clang/test/Analysis/LifetimeSafety/benchmark.py b/clang/test/Analysis/LifetimeSafety/benchmark.py new file mode 100644 index 0000000000000..10ffa6d7dc2be --- /dev/null +++ b/clang/test/Analysis/LifetimeSafety/benchmark.py @@ -0,0 +1,308 @@ +import sys +import argparse +import subprocess +import tempfile +import json +import os +from datetime import datetime +import numpy as np +from scipy.optimize import curve_fit +from scipy.stats import t + + +def generate_cpp_cycle_test(n: int) -> str: + """ + Generates a C++ code snippet with a specified number of pointers in a cycle. + Creates a while loop that rotates N pointers. + This pattern tests the convergence speed of the dataflow analysis when + reaching its fixed point. + + Example: + struct MyObj { int id; ~MyObj() {} }; + + void long_cycle_4(bool condition) { + MyObj v1{1}; + MyObj v2{1}; + MyObj v3{1}; + MyObj v4{1}; + + MyObj* p1 = &v1; + MyObj* p2 = &v2; + MyObj* p3 = &v3; + MyObj* p4 = &v4; + + while (condition) { + MyObj* temp = p1; + p1 = p2; + p2 = p3; + p3 = p4; + p4 = temp; + } + } + """ + if n <= 0: + return "// Number of variables must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void long_cycle_{n}(bool condition) {{\n" + for i in range(1, n + 1): + cpp_code += f" MyObj v{i}{{1}};\n" + cpp_code += "\n" + for i in range(1, n + 1): + cpp_code += f" MyObj* p{i} = &v{i};\n" + + cpp_code += "\n while (condition) {\n" + if n > 0: + cpp_code += f" MyObj* temp = p1;\n" + for i in range(1, n): + cpp_code += f" p{i} = p{i+1};\n" + cpp_code += f" p{n} = temp;\n" + cpp_code += " }\n}\n" + cpp_code += f"\nint main() {{ long_cycle_{n}(false); return 0; }}\n" + return cpp_code + + +def generate_cpp_merge_test(n: int) -> str: + """ + Creates N independent if statements that merge at a single point. + This pattern specifically stresses the performance of the + 'LifetimeLattice::join' operation. + + Example: + struct MyObj { int id; ~MyObj() {} }; + + void conditional_merges_4(bool condition) { + MyObj v1, v2, v3, v4; + MyObj *p1 = nullptr, *p2 = nullptr, *p3 = nullptr, *p4 = nullptr; + + if(condition) { p1 = &v1; } + if(condition) { p2 = &v2; } + if(condition) { p3 = &v3; } + if(condition) { p4 = &v4; } + } + """ + if n <= 0: + return "// Number of variables must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void conditional_merges_{n}(bool condition) {{\n" + decls = [f"v{i}" for i in range(1, n + 1)] + cpp_code += f" MyObj {', '.join(decls)};\n" + ptr_decls = [f"*p{i} = nullptr" for i in range(1, n + 1)] + cpp_code += f" MyObj {', '.join(ptr_decls)};\n\n" + + for i in range(1, n + 1): + cpp_code += f" if(condition) {{ p{i} = &v{i}; }}\n" + + cpp_code += "}\n" + cpp_code += f"\nint main() {{ conditional_merges_{n}(false); return 0; }}\n" + return cpp_code + + +def analyze_trace_file(trace_path: str) -> tuple[float, float]: + """ + Parses the -ftime-trace JSON output to find durations. + + Returns: + A tuple of (lifetime_analysis_duration_us, total_clang_duration_us). + """ + lifetime_duration = 0.0 + total_duration = 0.0 + try: + with open(trace_path, "r") as f: + trace_data = json.load(f) + for event in trace_data.get("traceEvents", []): + if event.get("name") == "LifetimeSafetyAnalysis": + lifetime_duration += float(event.get("dur", 0)) + if event.get("name") == "ExecuteCompiler": + total_duration += float(event.get("dur", 0)) + + except (IOError, json.JSONDecodeError) as e: + print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr) + return 0.0, 0.0 + return lifetime_duration, total_duration + + +def power_law(n, c, k): + """Represents the power law function: y = c * n^k""" + return c * np.power(n, k) + + +def human_readable_time(ms: float) -> str: + """Converts milliseconds to a human-readable string (ms or s).""" + if ms >= 1000: + return f"{ms / 1000:.2f} s" + return f"{ms:.2f} ms" + + +def generate_markdown_report(results: dict) -> str: + """Generates a Markdown-formatted report from the benchmark results.""" + report = [] + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z") + report.append(f"# Lifetime Analysis Performance Report") + report.append(f"> Generated on: {timestamp}") + report.append("\n---\n") + + for test_name, data in results.items(): + title = data["title"] + report.append(f"## Test Case: {title}") + report.append("") + + # Table header + report.append("| N | Analysis Time | Total Clang Time |") + report.append("|:----|--------------:|-----------------:|") + + # Table rows + n_data = np.array(data["n"]) + analysis_data = np.array(data["lifetime_ms"]) + total_data = np.array(data["total_ms"]) + for i in range(len(n_data)): + analysis_str = human_readable_time(analysis_data[i]) + total_str = human_readable_time(total_data[i]) + report.append(f"| {n_data[i]:<3} | {analysis_str:>13} | {total_str:>16} |") + + report.append("") + + # Complexity analysis + report.append(f"**Complexity Analysis:**") + try: + # Curve fitting requires at least 3 points + if len(n_data) < 3: + raise ValueError("Not enough data points to perform curve fitting.") + + popt, pcov = curve_fit( + power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000 + ) + _, k = popt + + # Confidence Interval for k + alpha = 0.05 # 95% confidence + dof = max(0, len(n_data) - len(popt)) # degrees of freedom + t_val = t.ppf(1.0 - alpha / 2.0, dof) + # Standard error of the parameters + perr = np.sqrt(np.diag(pcov)) + k_stderr = perr[1] + k_ci_lower = k - t_val * k_stderr + k_ci_upper = k + t_val * k_stderr + + report.append( + f"- The performance for this case scales approx. as **O(n<sup>{k:.2f}</sup>)**." + ) + report.append( + f"- **95% Confidence interval for exponent:** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`." + ) + + except (RuntimeError, ValueError) as e: + report.append(f"- Could not determine a best-fit curve for the data: {e}") + + report.append("\n---\n") + + return "\n".join(report) + + +def run_single_test( + clang_binary: str, output_dir: str, test_name: str, generator_func, n: int +) -> tuple[float, float]: + """Generates, compiles, and benchmarks a single test case.""" + print(f"--- Running Test: {test_name.capitalize()} with N={n} ---") + + generated_code = generator_func(n) + + base_name = f"test_{test_name}_{n}" + source_file = os.path.join(output_dir, f"{base_name}.cpp") + trace_file = os.path.join(output_dir, f"{base_name}.json") + + with open(source_file, "w") as f: + f.write(generated_code) + + clang_command = [ + clang_binary, + "-c", + "-o", + "/dev/null", + "-ftime-trace=" + trace_file, + "-Wexperimental-lifetime-safety", + "-std=c++17", + source_file, + ] + + result = subprocess.run(clang_command, capture_output=True, text=True) + + if result.returncode != 0: + print(f"Compilation failed for N={n}!", file=sys.stderr) + print(result.stderr, file=sys.stderr) + return 0.0, 0.0 + + lifetime_us, total_us = analyze_trace_file(trace_file) + + return lifetime_us / 1000.0, total_us / 1000.0 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate, compile, and benchmark C++ test cases for Clang's lifetime analysis." + ) + parser.add_argument( + "--clang-binary", type=str, required=True, help="Path to the Clang executable." + ) + parser.add_argument( + "--output-dir", + type=str, + default="benchmark_results", + help="Directory to save persistent benchmark files. (Default: ./benchmark_results)", + ) + + args = parser.parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + print(f"Benchmark files will be saved in: {os.path.abspath(args.output_dir)}\n") + + + test_configurations = [ + { + "name": "cycle", + "title": "Pointer Cycle in Loop", + "generator_func": generate_cpp_cycle_test, + "n_values": [10, 25, 50, 75, 100, 150], + }, + { + "name": "merge", + "title": "CFG Merges", + "generator_func": generate_cpp_merge_test, + "n_values": [10, 50, 100, 200, 400, 800], + }, + ] + + results = {} + + print("Running performance benchmarks...") + for config in test_configurations: + test_name = config["name"] + results[test_name] = { + "title": config["title"], + "n": [], + "lifetime_ms": [], + "total_ms": [], + } + for n in config["n_values"]: + lifetime_ms, total_ms = run_single_test( + args.clang_binary, + args.output_dir, + test_name, + config["generator_func"], + n, + ) + if total_ms > 0: + results[test_name]["n"].append(n) + results[test_name]["lifetime_ms"].append(lifetime_ms) + results[test_name]["total_ms"].append(total_ms) + print( + f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}" + ) + + print("\n\n" + "=" * 80) + print("Generating Markdown Report...") + print("=" * 80 + "\n") + + markdown_report = generate_markdown_report(results) + print(markdown_report) diff --git a/clang/test/Analysis/LifetimeSafety/requirements.txt b/clang/test/Analysis/LifetimeSafety/requirements.txt new file mode 100644 index 0000000000000..6bad10388ecb1 --- /dev/null +++ b/clang/test/Analysis/LifetimeSafety/requirements.txt @@ -0,0 +1,2 @@ +numpy +scipy diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 416af9ab4d0aa..286c9d40d2dab 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -234,3 +234,5 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/debuginfo-tests) add_subdirectory(debuginfo-tests) endif() endif() + +add_subdirectory(Analysis/LifetimeSafety) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits