Files
composable_kernel/script/analyze_build/examples/analyze_file.py
John Shumway 0caf06e6f1 Add build trace analysis tools for -ftime-trace data
Introduces a new Python toolset in script/analyze_build/ for analyzing
Clang -ftime-trace JSON output to identify compilation bottlenecks and
optimize C++ metaprogramming build times.

Key features:
- Fast parallel processing of trace json files  using all CPU cores (> 100 files/sec)
- Simple, cache-free architecture for consistent performance
- Comprehensive analysis of template instantiations and event types
- Command-line tools and Jupyter notebook support
- Automatic orjson detection for JSON parsing speedup

Components:
- trace_analysis/: Core library (models, parser, transformer)
- examples/: CLI tools for single-file and directory analysis
- notebooks/: Comprehensive Jupyter notebook with analysis patterns
- Detailed README with usage examples and performance data

Also adds ruff configuration to pyproject.toml to ignore E402 (module
level import not at top of file) for Jupyter notebooks, which commonly
have imports after markdown cells.

This toolset addresses the critical problem of long build times in CK's
C++17 metaprogramming codebase by treating -ftime-trace as a big data
problem, using pandas and modern analysis tools to understand compilation
patterns and measure improvement opportunities.
2026-01-03 18:28:22 -05:00

145 lines
4.6 KiB
Python

#!/usr/bin/env python3
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
"""
Analyze a single -ftime-trace JSON file.
Simple script to analyze one trace file and display summary statistics.
Usage:
python analyze_file.py <path_to_trace_file.json>
Example:
python analyze_file.py ../../build-trace/some_file.json
"""
import sys
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from trace_analysis import TraceFile, TraceParser, TraceTransformer
def analyze_trace_file(trace_path: Path):
"""Analyze a single trace file and print summary statistics."""
print(f"\n{'=' * 80}")
print(f"Analyzing: {trace_path.name}")
print(f"{'=' * 80}\n")
# Parse the file
trace_file = TraceFile.from_path(trace_path)
events = TraceParser.parse(trace_file)
events_df = TraceTransformer.to_events_dataframe(events)
templates_df = TraceTransformer.to_templates_dataframe(events)
# Basic statistics
total_duration_us = events_df["dur"].sum()
print(f"Total events: {len(events_df):,}")
print(f"Total duration: {total_duration_us / 1e6:.2f}s")
print(f"File size: {trace_file.size_bytes / 1024:.1f} KB\n")
# Top event types by duration
print("Top 10 Event Types by Total Duration:")
print(f"{'Event Type':<40} {'Count':>10} {'Total (s)':>12} {'Avg (ms)':>12}")
print("-" * 80)
event_stats = events_df.groupby("name", observed=True)["dur"].agg(
["sum", "count", "mean"]
)
event_stats = event_stats.sort_values("sum", ascending=False).head(10)
for name, row in event_stats.iterrows():
total_sec = row["sum"] / 1e6
avg_ms = row["mean"] / 1e3
print(f"{name:<40} {int(row['count']):>10,} {total_sec:>12.2f} {avg_ms:>12.2f}")
# Template analysis
if len(templates_df) > 0:
total_template_time = templates_df["dur"].sum()
print("\n\nTemplate Instantiation Summary:")
print("-" * 80)
print(f"Total template instantiations: {len(templates_df):,}")
print(f"Total template time: {total_template_time / 1e6:.2f}s")
print(
f"Template time percentage: {(total_template_time / total_duration_us) * 100:.1f}%"
)
# Top 10 slowest individual template instantiations
print("\n\nTop 10 Slowest Template Instantiations:")
print(f"{'Duration (s)':>12} {'Template'}")
print("-" * 80)
slowest = templates_df.nlargest(10, "dur")
for _, row in slowest.iterrows():
duration_sec = row["dur"] / 1e6
template = row["template_detail"]
if len(template) > 65:
template = template[:62] + "..."
print(f"{duration_sec:>12.2f} {template}")
# Most frequently instantiated templates
print("\n\nTop 10 Most Frequently Instantiated Templates:")
print(f"{'Count':>10} {'Template'}")
print("-" * 80)
template_counts = templates_df["template_detail"].value_counts().head(10)
for template, count in template_counts.items():
if len(template) > 65:
template = template[:62] + "..."
print(f"{count:>10,} {template}")
# Most expensive templates by total time
print("\n\nTop 10 Most Expensive Templates by Total Duration:")
print(f"{'Total (s)':>12} {'Count':>10} {'Avg (ms)':>12} {'Template'}")
print("-" * 80)
template_totals = templates_df.groupby("template_detail")["dur"].agg(
["sum", "count", "mean"]
)
template_totals = template_totals.sort_values("sum", ascending=False).head(10)
for template, row in template_totals.iterrows():
total_sec = row["sum"] / 1e6
avg_ms = row["mean"] / 1e3
display = template if len(template) <= 40 else template[:37] + "..."
print(
f"{total_sec:>12.2f} {int(row['count']):>10,} {avg_ms:>12.2f} {display}"
)
print(f"\n{'=' * 80}\n")
def main():
"""Main entry point."""
if len(sys.argv) != 2:
print(__doc__)
sys.exit(1)
trace_path = Path(sys.argv[1])
if not trace_path.exists():
print(f"Error: File not found: {trace_path}")
sys.exit(1)
if not trace_path.suffix == ".json":
print(f"Warning: File does not have .json extension: {trace_path}")
try:
analyze_trace_file(trace_path)
except Exception as e:
print(f"Error analyzing file: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()