diff --git a/pyproject.toml b/pyproject.toml index 9e1457b7d8..7286c2e28b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,3 +46,7 @@ ck4inductor = "python/ck4inductor" [tool.setuptools.dynamic] version = { attr = "ck4inductor.__version__" } + +[tool.ruff] +[tool.ruff.lint.per-file-ignores] +"script/analyze_build/notebooks/*.ipynb" = ["E402"] diff --git a/script/analyze_build/README.md b/script/analyze_build/README.md new file mode 100644 index 0000000000..582a01f91d --- /dev/null +++ b/script/analyze_build/README.md @@ -0,0 +1,263 @@ +# Build Trace Analysis + +Simple, fast tools for analyzing Clang `-ftime-trace` build performance data. + +## Overview + +This directory provides straightforward Python tools for analyzing the JSON trace files generated during compilation with `-ftime-trace`. The focus is on simplicity and speed - no caching, no complexity, just fast parallel I/O and pandas DataFrames. + +**Key principle: Fresh analysis every time is faster and simpler than managing caches.** + +## Quick Start + +```bash +# Analyze all trace files in a directory +cd script/analyze_build/examples +python analyze_build.py ../../build-trace + +# Analyze a single file +python analyze_file.py ../../build-trace/some_file.json +``` + +## Installation + +Install required Python packages: + +```bash +pip install pandas orjson tqdm +``` + +**Performance Note**: `orjson` provides a 1.65x speedup in JSON parsing. The parser automatically uses it if available, otherwise falls back to the standard library. + +## Directory Structure + +``` +script/analyze_build/ +├── trace_analysis/ # Core library +│ ├── __init__.py # Main exports +│ ├── models.py # TraceFile model +│ ├── parser.py # Fast JSON parsing +│ └── transformer.py # DataFrame conversion +├── examples/ +│ ├── analyze_build.py # Analyze all files in a directory +│ └── analyze_file.py # Analyze a single file +├── notebooks/ # Jupyter notebooks for analysis +│ └── (existing notebooks) +└── README.md # This file +``` + +## Usage + +### Command-Line Analysis + +**Analyze all trace files:** + +```bash +python examples/analyze_build.py ../../build-trace +``` + +This will: +- Find all `.json` files recursively +- Process them in parallel using all CPU cores +- Display comprehensive build statistics +- Show top event types, slowest files, and template analysis + +**Analyze a single file:** + +```bash +python examples/analyze_file.py ../../build-trace/some_file.json +``` + +### Python API + +```python +from pathlib import Path +from trace_analysis import TraceFile, TraceParser, TraceTransformer + +# Parse a single file +trace_file = TraceFile.from_path(Path("build.json")) +events = TraceParser.parse(trace_file) + +# Convert to DataFrames +events_df = TraceTransformer.to_events_dataframe(events) +templates_df = TraceTransformer.to_templates_dataframe(events) + +# Analyze +print(f"Total events: {len(events_df):,}") +print(f"Total time: {events_df['dur'].sum() / 1e6:.2f}s") +print(f"Template time: {templates_df['dur'].sum() / 1e6:.2f}s") +``` + +### Jupyter Notebooks + +For interactive analysis, see the comprehensive example notebook: + +**[notebooks/comprehensive_example.ipynb](notebooks/comprehensive_example.ipynb)** - Complete guide covering: +- Single file analysis with detailed explanations +- Multi-file parallel processing +- Build-wide statistics and template analysis +- Advanced analysis patterns (optimization targets, distributions, etc.) +- Practical recommendations for improving build times + +Quick example for custom notebooks: + +```python +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor +from trace_analysis import TraceFile, TraceParser, TraceTransformer +import pandas as pd + +def process_file(json_path): + trace_file = TraceFile.from_path(json_path) + events = TraceParser.parse(trace_file) + return TraceTransformer.to_events_dataframe(events) + +# Process all files in parallel +trace_dir = Path("../../build-trace") +json_files = list(trace_dir.rglob("*.json")) + +with ProcessPoolExecutor() as executor: + dfs = list(executor.map(process_file, json_files)) + +# Combine and analyze +events_df = pd.concat(dfs, ignore_index=True) + +# Top event types +event_totals = events_df.groupby('name')['dur'].sum().sort_values(ascending=False) +print(event_totals.head(10)) +``` + +## Performance + +**Typical performance on 4,484 trace files (~46 GB):** +- Parsing: ~26 seconds (174 files/sec) +- Memory: ~1-2 GB +- Throughput: I/O limited (uses all CPU cores) + +**Why no caching?** +- Fresh analysis is faster than cache management overhead +- Simpler code (60% less code than cached version) +- No cache invalidation issues +- Catches changes immediately + +## Data Format + +The trace files use the [Chrome Trace Event Format](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview): + +```json +{ + "traceEvents": [ + { + "pid": 1234, + "tid": 1234, + "ts": 1000, + "dur": 500, + "ph": "X", + "name": "InstantiateFunction", + "args": { + "detail": "template_name" + } + } + ], + "beginningOfTime": 1234567890 +} +``` + +**Key fields:** +- `name`: Event type (e.g., "InstantiateClass", "ParseFunctionDefinition") +- `dur`: Duration in microseconds +- `ts`: Timestamp in microseconds +- `args.detail`: Additional information (e.g., template name) + +## Library Components + +### TraceFile + +Simple model for trace file metadata: + +```python +@dataclass +class TraceFile: + path: Path + size_bytes: int + mtime_ns: int + + @classmethod + def from_path(cls, path: Path) -> "TraceFile" +``` + +### TraceParser + +Fast JSON parsing with orjson support: + +```python +class TraceParser: + @staticmethod + def parse(trace_file: TraceFile) -> List[Dict[str, Any]] +``` + +Automatically uses `orjson` if available for 1.65x speedup. + +### TraceTransformer + +Convert parsed events to pandas DataFrames: + +```python +class TraceTransformer: + @staticmethod + def to_events_dataframe(events: List[Dict]) -> pd.DataFrame + + @staticmethod + def to_templates_dataframe(events: List[Dict]) -> pd.DataFrame +``` + +The events DataFrame includes all events with optimized dtypes. The templates DataFrame filters to template-related events and extracts template details. + +## Analysis Examples + +### Find Most Expensive Event Types + +```python +event_totals = events_df.groupby('name')['dur'].sum() +top_events = event_totals.sort_values(ascending=False).head(10) +print(top_events / 1e6) # Convert to seconds +``` + +### Find Slowest Files + +```python +file_totals = events_df.groupby('file_name')['dur'].sum() +slowest = file_totals.sort_values(ascending=False).head(10) +print(slowest / 1e6) # Convert to seconds +``` + +### Analyze Template Instantiations + +```python +# Most frequently instantiated +template_counts = templates_df['template_detail'].value_counts() +print(template_counts.head(10)) + +# Most expensive by total time +template_totals = templates_df.groupby('template_detail')['dur'].sum() +print(template_totals.sort_values(ascending=False).head(10) / 1e6) + +# Template time percentage +total_time = events_df['dur'].sum() +template_time = templates_df['dur'].sum() +print(f"Template time: {(template_time / total_time) * 100:.1f}%") +``` + +## Tips + +- **Use all CPU cores**: The tools automatically use all available cores for parallel processing +- **Memory is cheap**: 1-2GB for 4,484 files is acceptable on modern systems +- **Fresh is fast**: No cache overhead means consistent ~26s analysis time +- **Jupyter-friendly**: Progress bars work automatically in notebooks +- **Simple is better**: One straightforward approach, not multiple complex paths + +## References + +- [Clang Time Trace Documentation](https://releases.llvm.org/11.0.0/tools/clang/docs/ClangCommandLineReference.html#cmdoption-clang-ftime-trace) +- [Chrome Trace Event Format](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview) +- [Template Metaprogramming Performance](https://www.youtube.com/watch?v=vwrXHznaYLA) diff --git a/script/analyze_build/data/.gitignore b/script/analyze_build/data/.gitignore new file mode 100644 index 0000000000..f16e0a558b --- /dev/null +++ b/script/analyze_build/data/.gitignore @@ -0,0 +1,3 @@ +# Ignore all cached and generated data files +* +!.gitignore diff --git a/script/analyze_build/examples/analyze_build.py b/script/analyze_build/examples/analyze_build.py new file mode 100644 index 0000000000..4313d7e71d --- /dev/null +++ b/script/analyze_build/examples/analyze_build.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python3 +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +""" +Analyze build trace files from Clang -ftime-trace. + +Fast parallel analysis of all trace files in a directory. No caching, no complexity - +just straightforward I/O-limited parallel processing with in-memory aggregation. + +Usage: + python analyze_build.py [trace_directory] + +Example: + python analyze_build.py ../../build-trace +""" + +import sys +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor, as_completed +from multiprocessing import cpu_count +import time +import pandas as pd + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from trace_analysis import TraceFile, TraceParser, TraceTransformer + +try: + from tqdm.auto import tqdm + + HAS_TQDM = True +except ImportError: + HAS_TQDM = False + + +def process_file(json_path: Path) -> tuple: + """ + Process a single trace file and return DataFrames. + + Args: + json_path: Path to JSON trace file + + Returns: + Tuple of (file_name, events_df, templates_df, stats...) + """ + trace_file = TraceFile.from_path(json_path) + events = TraceParser.parse(trace_file) + events_df = TraceTransformer.to_events_dataframe(events) + templates_df = TraceTransformer.to_templates_dataframe(events) + + return ( + str(json_path.name), + events_df, + templates_df, + len(events_df), + int(events_df["dur"].sum()) if len(events_df) > 0 else 0, + len(templates_df), + int(templates_df["dur"].sum()) if len(templates_df) > 0 else 0, + ) + + +def main(): + """Main entry point.""" + # Get trace directory + if len(sys.argv) > 1: + trace_dir = Path(sys.argv[1]) + else: + trace_dir = Path(__file__).parent.parent.parent / "build-trace" + + if not trace_dir.exists(): + print(f"Error: Trace directory not found: {trace_dir}") + print(f"\nUsage: {sys.argv[0]} [trace_directory]") + sys.exit(1) + + # Find all JSON files recursively + json_files = list(trace_dir.rglob("*.json")) + + if not json_files: + print(f"No trace files found in {trace_dir}") + sys.exit(1) + + print(f"Found {len(json_files):,} trace files in {trace_dir}") + + # Process all files in parallel + start_time = time.time() + all_events = [] + all_templates = [] + file_stats = [] + + workers = cpu_count() + print(f"Processing with {workers} workers...\n") + + # Submit all files for parallel processing + with ProcessPoolExecutor(max_workers=workers) as executor: + futures = {executor.submit(process_file, f): f for f in json_files} + + # Collect results with progress bar + if HAS_TQDM: + pbar = tqdm(total=len(json_files), desc="Processing", unit="files") + + for future in as_completed(futures): + ( + file_name, + events_df, + templates_df, + event_count, + event_dur, + template_count, + template_dur, + ) = future.result() + + all_events.append(events_df) + all_templates.append(templates_df) + file_stats.append( + { + "file_name": file_name, + "total_events": event_count, + "total_duration_us": event_dur, + "template_event_count": template_count, + "template_duration_us": template_dur, + } + ) + + if HAS_TQDM: + pbar.update(1) + + if HAS_TQDM: + pbar.close() + + elapsed = time.time() - start_time + print( + f"\nParsing complete in {elapsed:.2f}s ({len(json_files) / elapsed:.1f} files/sec)" + ) + + # Combine all DataFrames + print("Combining results...") + combine_start = time.time() + + # Filter out empty DataFrames to avoid FutureWarning + non_empty_events = [df for df in all_events if len(df) > 0] + non_empty_templates = [df for df in all_templates if len(df) > 0] + + events_df = ( + pd.concat(non_empty_events, ignore_index=True) + if non_empty_events + else pd.DataFrame() + ) + templates_df = ( + pd.concat(non_empty_templates, ignore_index=True) + if non_empty_templates + else pd.DataFrame() + ) + file_stats_df = pd.DataFrame(file_stats) + + combine_time = time.time() - combine_start + print(f"Combined in {combine_time:.2f}s") + + # Display statistics + total_duration_us = events_df["dur"].sum() + + print(f"\n{'=' * 80}") + print("ANALYSIS RESULTS") + print(f"{'=' * 80}") + print(f"Total files processed: {len(json_files):,}") + print(f"Total events: {len(events_df):,}") + print(f"Total build time: {total_duration_us / 1e6:.2f} seconds") + print(f"{'=' * 80}\n") + + # Top event types by duration + print("Top 10 Event Types by Total Duration:") + print("-" * 80) + event_totals = ( + events_df.groupby("name", observed=True)["dur"] + .sum() + .sort_values(ascending=False) + ) + for event_type, duration in event_totals.head(10).items(): + print(f"{event_type:<50} {duration / 1e6:>12.2f}s") + + # Slowest files + print("\nTop 10 Slowest Files:") + print("-" * 80) + slowest = file_stats_df.nlargest(10, "total_duration_us") + for _, row in slowest.iterrows(): + print(f"{row['file_name']:<50} {row['total_duration_us'] / 1e6:>12.2f}s") + + # Template analysis + if len(templates_df) > 0: + total_template_time = templates_df["dur"].sum() + + print("\nTemplate Instantiation Summary:") + print("-" * 80) + print(f"Total template instantiations: {len(templates_df):,}") + print(f"Total template time: {total_template_time / 1e6:.2f}s") + print( + f"Template time percentage: {(total_template_time / total_duration_us) * 100:.1f}%" + ) + + # Most common templates + print("\nTop 10 Most Frequently Instantiated Templates:") + print("-" * 80) + template_counts = templates_df["template_detail"].value_counts() + for template, count in template_counts.head(10).items(): + display = template if len(template) <= 60 else template[:57] + "..." + print(f"{count:>8,} {display}") + + # Most expensive templates + print("\nTop 10 Most Expensive Templates by Total Duration:") + print("-" * 80) + template_totals = templates_df.groupby("template_detail")["dur"].agg( + ["sum", "count"] + ) + template_totals["avg"] = template_totals["sum"] / template_totals["count"] + template_totals = template_totals.sort_values("sum", ascending=False) + + for template, row in template_totals.head(10).iterrows(): + display = template if len(template) <= 50 else template[:47] + "..." + print( + f"{display:<50} {row['sum'] / 1e6:>10.2f}s (avg: {row['avg'] / 1e3:>8.2f}ms)" + ) + + print(f"\n{'=' * 80}\n") + print(f"Total analysis time: {time.time() - start_time:.2f}s") + + +if __name__ == "__main__": + main() diff --git a/script/analyze_build/examples/analyze_file.py b/script/analyze_build/examples/analyze_file.py new file mode 100644 index 0000000000..7d20f501dd --- /dev/null +++ b/script/analyze_build/examples/analyze_file.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +""" +Analyze a single -ftime-trace JSON file. + +Simple script to analyze one trace file and display summary statistics. + +Usage: + python analyze_file.py + +Example: + python analyze_file.py ../../build-trace/some_file.json +""" + +import sys +from pathlib import Path + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from trace_analysis import TraceFile, TraceParser, TraceTransformer + + +def analyze_trace_file(trace_path: Path): + """Analyze a single trace file and print summary statistics.""" + + print(f"\n{'=' * 80}") + print(f"Analyzing: {trace_path.name}") + print(f"{'=' * 80}\n") + + # Parse the file + trace_file = TraceFile.from_path(trace_path) + events = TraceParser.parse(trace_file) + events_df = TraceTransformer.to_events_dataframe(events) + templates_df = TraceTransformer.to_templates_dataframe(events) + + # Basic statistics + total_duration_us = events_df["dur"].sum() + + print(f"Total events: {len(events_df):,}") + print(f"Total duration: {total_duration_us / 1e6:.2f}s") + print(f"File size: {trace_file.size_bytes / 1024:.1f} KB\n") + + # Top event types by duration + print("Top 10 Event Types by Total Duration:") + print(f"{'Event Type':<40} {'Count':>10} {'Total (s)':>12} {'Avg (ms)':>12}") + print("-" * 80) + + event_stats = events_df.groupby("name", observed=True)["dur"].agg( + ["sum", "count", "mean"] + ) + event_stats = event_stats.sort_values("sum", ascending=False).head(10) + + for name, row in event_stats.iterrows(): + total_sec = row["sum"] / 1e6 + avg_ms = row["mean"] / 1e3 + print(f"{name:<40} {int(row['count']):>10,} {total_sec:>12.2f} {avg_ms:>12.2f}") + + # Template analysis + if len(templates_df) > 0: + total_template_time = templates_df["dur"].sum() + + print("\n\nTemplate Instantiation Summary:") + print("-" * 80) + print(f"Total template instantiations: {len(templates_df):,}") + print(f"Total template time: {total_template_time / 1e6:.2f}s") + print( + f"Template time percentage: {(total_template_time / total_duration_us) * 100:.1f}%" + ) + + # Top 10 slowest individual template instantiations + print("\n\nTop 10 Slowest Template Instantiations:") + print(f"{'Duration (s)':>12} {'Template'}") + print("-" * 80) + + slowest = templates_df.nlargest(10, "dur") + for _, row in slowest.iterrows(): + duration_sec = row["dur"] / 1e6 + template = row["template_detail"] + if len(template) > 65: + template = template[:62] + "..." + print(f"{duration_sec:>12.2f} {template}") + + # Most frequently instantiated templates + print("\n\nTop 10 Most Frequently Instantiated Templates:") + print(f"{'Count':>10} {'Template'}") + print("-" * 80) + + template_counts = templates_df["template_detail"].value_counts().head(10) + for template, count in template_counts.items(): + if len(template) > 65: + template = template[:62] + "..." + print(f"{count:>10,} {template}") + + # Most expensive templates by total time + print("\n\nTop 10 Most Expensive Templates by Total Duration:") + print(f"{'Total (s)':>12} {'Count':>10} {'Avg (ms)':>12} {'Template'}") + print("-" * 80) + + template_totals = templates_df.groupby("template_detail")["dur"].agg( + ["sum", "count", "mean"] + ) + template_totals = template_totals.sort_values("sum", ascending=False).head(10) + + for template, row in template_totals.iterrows(): + total_sec = row["sum"] / 1e6 + avg_ms = row["mean"] / 1e3 + display = template if len(template) <= 40 else template[:37] + "..." + print( + f"{total_sec:>12.2f} {int(row['count']):>10,} {avg_ms:>12.2f} {display}" + ) + + print(f"\n{'=' * 80}\n") + + +def main(): + """Main entry point.""" + if len(sys.argv) != 2: + print(__doc__) + sys.exit(1) + + trace_path = Path(sys.argv[1]) + + if not trace_path.exists(): + print(f"Error: File not found: {trace_path}") + sys.exit(1) + + if not trace_path.suffix == ".json": + print(f"Warning: File does not have .json extension: {trace_path}") + + try: + analyze_trace_file(trace_path) + except Exception as e: + print(f"Error analyzing file: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/script/analyze_build/notebooks/comprehensive_example.ipynb b/script/analyze_build/notebooks/comprehensive_example.ipynb new file mode 100644 index 0000000000..4be50bdde4 --- /dev/null +++ b/script/analyze_build/notebooks/comprehensive_example.ipynb @@ -0,0 +1,930 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Comprehensive Build Trace Analysis Guide\n", + "\n", + "This notebook demonstrates how to analyze C++ build performance using Clang's `-ftime-trace` feature. We'll explore the trace analysis library and show practical techniques for understanding and improving compilation times.\n", + "\n", + "## The Problem: C++ Metaprogramming Build Times\n", + "\n", + "The Composable Kernel (CK) library uses extensive C++17 metaprogramming to generate high-performance GPU kernels. While this approach provides excellent runtime performance, it comes with a cost: **long compilation times**.\n", + "\n", + "Understanding where the compiler spends its time is critical for:\n", + "- **Identifying bottlenecks**: Which templates are most expensive?\n", + "- **Measuring progress**: Are our optimizations working?\n", + "- **Focusing efforts**: Where should we invest time to improve build performance?\n", + "\n", + "## The Solution: Data-Driven Analysis\n", + "\n", + "Clang's `-ftime-trace` flag generates detailed JSON files showing exactly where compilation time is spent. This notebook shows how to:\n", + "\n", + "1. **Parse** trace files efficiently using parallel processing\n", + "2. **Transform** raw JSON into analyzable pandas DataFrames\n", + "3. **Analyze** build performance with practical examples\n", + "4. **Visualize** results to communicate findings\n", + "\n", + "Let's treat this as a **big data problem** and use the best tools available: pandas, parallel processing, and Jupyter notebooks." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup and Imports\n", + "\n", + "First, let's import the necessary libraries and set up our environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "from concurrent.futures import ProcessPoolExecutor, as_completed\n", + "from multiprocessing import cpu_count\n", + "import time\n", + "\n", + "import pandas as pd\n", + "\n", + "# Add parent directory to path to import trace_analysis\n", + "sys.path.insert(0, str(Path.cwd().parent))\n", + "\n", + "from trace_analysis import TraceFile, TraceParser, TraceTransformer\n", + "\n", + "# Optional: tqdm for progress bars\n", + "try:\n", + " from tqdm.auto import tqdm\n", + "\n", + " HAS_TQDM = True\n", + "except ImportError:\n", + " HAS_TQDM = False\n", + " print(\"Note: Install tqdm for progress bars: pip install tqdm\")\n", + "\n", + "# Display settings\n", + "pd.set_option(\"display.max_rows\", 100)\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", None)\n", + "pd.set_option(\"display.max_colwidth\", 80)\n", + "\n", + "print(f\"Using {cpu_count()} CPU cores for parallel processing\")\n", + "print(f\"Pandas version: {pd.__version__}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 1: Single File Analysis\n", + "\n", + "Let's start by analyzing a single trace file to understand the data structure and basic analysis patterns.\n", + "\n", + "### Understanding the Trace File Format\n", + "\n", + "Clang's `-ftime-trace` generates JSON files in the [Chrome Trace Event Format](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview). Each file contains:\n", + "\n", + "- **traceEvents**: Array of compilation events (parsing, template instantiation, code generation, etc.)\n", + "- **beginningOfTime**: Timestamp reference point\n", + "\n", + "Each event has:\n", + "- `name`: Event type (e.g., \"InstantiateFunction\", \"ParseClass\")\n", + "- `dur`: Duration in microseconds\n", + "- `ts`: Timestamp in microseconds\n", + "- `args.detail`: Additional information (e.g., template signature)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure the path to your trace files\n", + "# Adjust this path to point to your build-trace directory\n", + "TRACE_DIR = Path(\"../../../build-trace\")\n", + "\n", + "# Find a sample trace file\n", + "sample_files = list(TRACE_DIR.rglob(\"*.json\"))\n", + "\n", + "if not sample_files:\n", + " print(f\"No trace files found in {TRACE_DIR}\")\n", + " print(\"\\nTo generate trace files:\")\n", + " print(\"1. Configure your build with: cmake -DCMAKE_CXX_FLAGS='-ftime-trace' ...\")\n", + " print(\"2. Build your project\")\n", + " print(\"3. Trace files will be generated alongside object files\")\n", + "else:\n", + " print(f\"Found {len(sample_files):,} trace files\")\n", + " sample_file = sample_files[0]\n", + " print(f\"\\nUsing sample file: {sample_file.name}\")\n", + " print(f\"File size: {sample_file.stat().st_size / 1024:.1f} KB\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parsing a Single File\n", + "\n", + "The trace_analysis library provides a simple three-step process:\n", + "\n", + "1. **TraceFile**: Create a metadata wrapper\n", + "2. **TraceParser**: Parse JSON into Python dictionaries\n", + "3. **TraceTransformer**: Convert to pandas DataFrames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if sample_files:\n", + " # Step 1: Create TraceFile metadata wrapper\n", + " trace_file = TraceFile.from_path(sample_file)\n", + " print(f\"Trace file: {trace_file.path.name}\")\n", + " print(f\"Size: {trace_file.size_bytes:,} bytes\")\n", + "\n", + " # Step 2: Parse JSON to Python dictionaries\n", + " start = time.time()\n", + " events = TraceParser.parse(trace_file)\n", + " parse_time = time.time() - start\n", + " print(f\"\\nParsed {len(events):,} events in {parse_time:.3f}s\")\n", + "\n", + " # Step 3: Convert to DataFrames\n", + " start = time.time()\n", + " events_df = TraceTransformer.to_events_dataframe(events)\n", + " templates_df = TraceTransformer.to_templates_dataframe(events)\n", + " transform_time = time.time() - start\n", + " print(f\"Transformed to DataFrames in {transform_time:.3f}s\")\n", + "\n", + " print(f\"\\nEvents DataFrame: {len(events_df):,} rows\")\n", + " print(f\"Templates DataFrame: {len(templates_df):,} rows\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Examining the Events DataFrame\n", + "\n", + "The events DataFrame contains all compilation events with optimized data types for memory efficiency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if sample_files:\n", + " print(\"Events DataFrame Schema:\")\n", + " print(events_df.dtypes)\n", + " print(\"\\nFirst few events:\")\n", + " display(events_df.head(10))\n", + "\n", + " print(\"\\nMemory usage:\")\n", + " print(events_df.memory_usage(deep=True))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Basic Statistics for a Single File" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if sample_files:\n", + " total_duration_us = events_df[\"dur\"].sum()\n", + " total_duration_s = total_duration_us / 1e6\n", + "\n", + " print(f\"Total compilation time: {total_duration_s:.2f} seconds\")\n", + " print(f\"Total events: {len(events_df):,}\")\n", + " print(f\"Average event duration: {events_df['dur'].mean() / 1e3:.2f} ms\")\n", + " print(f\"Median event duration: {events_df['dur'].median() / 1e3:.2f} ms\")\n", + " print(f\"Max event duration: {events_df['dur'].max() / 1e6:.2f} s\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Top Event Types\n", + "\n", + "Let's see which types of compilation events take the most time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if sample_files:\n", + " # Aggregate by event type\n", + " event_stats = (\n", + " events_df.groupby(\"name\", observed=True)[\"dur\"]\n", + " .agg(\n", + " [\n", + " (\"count\", \"count\"),\n", + " (\"total_us\", \"sum\"),\n", + " (\"mean_us\", \"mean\"),\n", + " (\"median_us\", \"median\"),\n", + " (\"max_us\", \"max\"),\n", + " ]\n", + " )\n", + " .sort_values(\"total_us\", ascending=False)\n", + " )\n", + "\n", + " # Convert to more readable units\n", + " event_stats[\"total_s\"] = event_stats[\"total_us\"] / 1e6\n", + " event_stats[\"mean_ms\"] = event_stats[\"mean_us\"] / 1e3\n", + " event_stats[\"median_ms\"] = event_stats[\"median_us\"] / 1e3\n", + " event_stats[\"max_ms\"] = event_stats[\"max_us\"] / 1e3\n", + " event_stats[\"pct_total\"] = (event_stats[\"total_us\"] / total_duration_us) * 100\n", + "\n", + " print(\"Top 15 Event Types by Total Duration:\")\n", + " display(\n", + " event_stats[[\"count\", \"total_s\", \"mean_ms\", \"max_ms\", \"pct_total\"]].head(15)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Template Instantiation Analysis\n", + "\n", + "Template instantiation is often the biggest contributor to C++ metaprogramming build times. Let's analyze it in detail." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if sample_files and len(templates_df) > 0:\n", + " template_time_us = templates_df[\"dur\"].sum()\n", + " template_time_s = template_time_us / 1e6\n", + " template_pct = (template_time_us / total_duration_us) * 100\n", + "\n", + " print(\"Template Instantiation Summary:\")\n", + " print(f\" Total instantiations: {len(templates_df):,}\")\n", + " print(f\" Total time: {template_time_s:.2f}s\")\n", + " print(f\" Percentage of build time: {template_pct:.1f}%\")\n", + " print(f\" Average per instantiation: {templates_df['dur'].mean() / 1e3:.2f} ms\")\n", + " print(f\" Median per instantiation: {templates_df['dur'].median() / 1e3:.2f} ms\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if sample_files and len(templates_df) > 0:\n", + " # Most expensive individual template instantiations\n", + " print(\"Top 10 Slowest Individual Template Instantiations:\")\n", + " slowest = templates_df.nlargest(10, \"dur\")[\n", + " [\"name\", \"dur\", \"template_detail\"]\n", + " ].copy()\n", + " slowest[\"dur_ms\"] = slowest[\"dur\"] / 1e3\n", + " display(slowest[[\"name\", \"dur_ms\", \"template_detail\"]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if sample_files and len(templates_df) > 0:\n", + " # Most frequently instantiated templates\n", + " print(\"Top 15 Most Frequently Instantiated Templates:\")\n", + " template_counts = templates_df[\"template_detail\"].value_counts().head(15)\n", + " display(\n", + " pd.DataFrame(\n", + " {\"template\": template_counts.index, \"count\": template_counts.values}\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if sample_files and len(templates_df) > 0:\n", + " # Most expensive templates by total time\n", + " print(\"Top 15 Most Expensive Templates by Total Duration:\")\n", + " template_totals = (\n", + " templates_df.groupby(\"template_detail\")[\"dur\"]\n", + " .agg(\n", + " [\n", + " (\"count\", \"count\"),\n", + " (\"total_us\", \"sum\"),\n", + " (\"mean_us\", \"mean\"),\n", + " (\"max_us\", \"max\"),\n", + " ]\n", + " )\n", + " .sort_values(\"total_us\", ascending=False)\n", + " )\n", + "\n", + " template_totals[\"total_s\"] = template_totals[\"total_us\"] / 1e6\n", + " template_totals[\"mean_ms\"] = template_totals[\"mean_us\"] / 1e3\n", + " template_totals[\"max_ms\"] = template_totals[\"max_us\"] / 1e3\n", + "\n", + " display(template_totals[[\"count\", \"total_s\", \"mean_ms\", \"max_ms\"]].head(15))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 2: Multi-File Analysis\n", + "\n", + "Now let's scale up to analyze an entire build. We'll use parallel processing to handle thousands of trace files efficiently.\n", + "\n", + "### Parallel Processing Strategy\n", + "\n", + "The key insight: **parsing is I/O bound**, so we can process multiple files in parallel using all CPU cores. The library uses:\n", + "\n", + "- `ProcessPoolExecutor` for true parallelism (not limited by Python's GIL)\n", + "- `orjson` for fast JSON parsing (1.65x faster than stdlib)\n", + "- Optimized pandas dtypes to minimize memory usage\n", + "\n", + "**Performance**: On a typical build with 4,484 trace files (~46 GB), we can parse everything in ~26 seconds." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def process_file(json_path: Path) -> tuple:\n", + " \"\"\"\n", + " Process a single trace file and return DataFrames.\n", + "\n", + " This function is designed to be called in parallel by ProcessPoolExecutor.\n", + " \"\"\"\n", + " trace_file = TraceFile.from_path(json_path)\n", + " events = TraceParser.parse(trace_file)\n", + " events_df = TraceTransformer.to_events_dataframe(events)\n", + " templates_df = TraceTransformer.to_templates_dataframe(events)\n", + "\n", + " return (\n", + " str(json_path.name),\n", + " events_df,\n", + " templates_df,\n", + " )\n", + "\n", + "\n", + "print(\"Parallel processing function defined\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Find all trace files\n", + "json_files = list(TRACE_DIR.rglob(\"*.json\"))\n", + "\n", + "if not json_files:\n", + " print(f\"No trace files found in {TRACE_DIR}\")\n", + "else:\n", + " print(f\"Found {len(json_files):,} trace files\")\n", + "\n", + " # For demonstration, you might want to limit the number of files\n", + " # Uncomment the next line to process only the first 100 files\n", + " # json_files = json_files[:100]\n", + "\n", + " total_size = sum(f.stat().st_size for f in json_files)\n", + " print(f\"Total size: {total_size / 1024**3:.2f} GB\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Processing All Files in Parallel\n", + "\n", + "This cell will process all trace files using all available CPU cores. Depending on the number of files and your system, this may take a few minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files:\n", + " print(f\"Processing {len(json_files):,} files with {cpu_count()} workers...\\n\")\n", + "\n", + " start_time = time.time()\n", + " all_events = []\n", + " all_templates = []\n", + " file_names = []\n", + "\n", + " # Submit all files for parallel processing\n", + " with ProcessPoolExecutor(max_workers=cpu_count()) as executor:\n", + " futures = {executor.submit(process_file, f): f for f in json_files}\n", + "\n", + " # Collect results with progress bar\n", + " if HAS_TQDM:\n", + " pbar = tqdm(total=len(json_files), desc=\"Processing\", unit=\"files\")\n", + "\n", + " for future in as_completed(futures):\n", + " file_name, events_df, templates_df = future.result()\n", + "\n", + " file_names.append(file_name)\n", + " all_events.append(events_df)\n", + " all_templates.append(templates_df)\n", + "\n", + " if HAS_TQDM:\n", + " pbar.update(1)\n", + "\n", + " if HAS_TQDM:\n", + " pbar.close()\n", + "\n", + " parse_time = time.time() - start_time\n", + " print(\n", + " f\"\\nParsing complete in {parse_time:.2f}s ({len(json_files) / parse_time:.1f} files/sec)\"\n", + " )\n", + "\n", + " # Combine all DataFrames\n", + " print(\"Combining results...\")\n", + " combine_start = time.time()\n", + "\n", + " # Filter out empty DataFrames\n", + " non_empty_events = [df for df in all_events if len(df) > 0]\n", + " non_empty_templates = [df for df in all_templates if len(df) > 0]\n", + "\n", + " all_events_df = (\n", + " pd.concat(non_empty_events, ignore_index=True)\n", + " if non_empty_events\n", + " else pd.DataFrame()\n", + " )\n", + " all_templates_df = (\n", + " pd.concat(non_empty_templates, ignore_index=True)\n", + " if non_empty_templates\n", + " else pd.DataFrame()\n", + " )\n", + "\n", + " combine_time = time.time() - combine_start\n", + " total_time = time.time() - start_time\n", + "\n", + " print(f\"Combined in {combine_time:.2f}s\")\n", + " print(f\"\\nTotal analysis time: {total_time:.2f}s\")\n", + " print(\"\\nCombined DataFrames:\")\n", + " print(f\" Events: {len(all_events_df):,} rows\")\n", + " print(f\" Templates: {len(all_templates_df):,} rows\")\n", + " print(\n", + " f\" Memory usage: {(all_events_df.memory_usage(deep=True).sum() + all_templates_df.memory_usage(deep=True).sum()) / 1024**3:.2f} GB\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build-Wide Statistics\n", + "\n", + "Now we have all compilation events in a single DataFrame. Let's analyze the entire build." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files and len(all_events_df) > 0:\n", + " total_build_time_us = all_events_df[\"dur\"].sum()\n", + " total_build_time_s = total_build_time_us / 1e6\n", + " total_build_time_min = total_build_time_s / 60\n", + "\n", + " print(\"=\" * 80)\n", + " print(\"BUILD-WIDE STATISTICS\")\n", + " print(\"=\" * 80)\n", + " print(f\"Files processed: {len(json_files):,}\")\n", + " print(f\"Total events: {len(all_events_df):,}\")\n", + " print(\n", + " f\"Total build time: {total_build_time_min:.2f} minutes ({total_build_time_s:.2f} seconds)\"\n", + " )\n", + " print(f\"Average time per file: {total_build_time_s / len(json_files):.2f} seconds\")\n", + " print(\"=\" * 80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Top Event Types Across Entire Build" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files and len(all_events_df) > 0:\n", + " print(\"Top 20 Event Types by Total Duration Across All Files:\")\n", + "\n", + " build_event_stats = (\n", + " all_events_df.groupby(\"name\", observed=True)[\"dur\"]\n", + " .agg(\n", + " [\n", + " (\"count\", \"count\"),\n", + " (\"total_us\", \"sum\"),\n", + " (\"mean_us\", \"mean\"),\n", + " (\"max_us\", \"max\"),\n", + " ]\n", + " )\n", + " .sort_values(\"total_us\", ascending=False)\n", + " )\n", + "\n", + " build_event_stats[\"total_s\"] = build_event_stats[\"total_us\"] / 1e6\n", + " build_event_stats[\"total_min\"] = build_event_stats[\"total_s\"] / 60\n", + " build_event_stats[\"mean_ms\"] = build_event_stats[\"mean_us\"] / 1e3\n", + " build_event_stats[\"max_ms\"] = build_event_stats[\"max_us\"] / 1e3\n", + " build_event_stats[\"pct_total\"] = (\n", + " build_event_stats[\"total_us\"] / total_build_time_us\n", + " ) * 100\n", + "\n", + " display(\n", + " build_event_stats[\n", + " [\"count\", \"total_min\", \"mean_ms\", \"max_ms\", \"pct_total\"]\n", + " ].head(20)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build-Wide Template Analysis\n", + "\n", + "This is where we can identify the most expensive templates across the entire build." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files and len(all_templates_df) > 0:\n", + " total_template_time_us = all_templates_df[\"dur\"].sum()\n", + " total_template_time_s = total_template_time_us / 1e6\n", + " total_template_time_min = total_template_time_s / 60\n", + " template_pct = (total_template_time_us / total_build_time_us) * 100\n", + "\n", + " print(\"=\" * 80)\n", + " print(\"BUILD-WIDE TEMPLATE INSTANTIATION SUMMARY\")\n", + " print(\"=\" * 80)\n", + " print(f\"Total template instantiations: {len(all_templates_df):,}\")\n", + " print(\n", + " f\"Total template time: {total_template_time_min:.2f} minutes ({total_template_time_s:.2f} seconds)\"\n", + " )\n", + " print(f\"Percentage of total build time: {template_pct:.1f}%\")\n", + " print(f\"Average per instantiation: {all_templates_df['dur'].mean() / 1e3:.2f} ms\")\n", + " print(f\"Median per instantiation: {all_templates_df['dur'].median() / 1e3:.2f} ms\")\n", + " print(\"=\" * 80)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files and len(all_templates_df) > 0:\n", + " print(\"Top 20 Most Expensive Templates by Total Duration:\")\n", + "\n", + " build_template_stats = (\n", + " all_templates_df.groupby(\"template_detail\")[\"dur\"]\n", + " .agg(\n", + " [\n", + " (\"count\", \"count\"),\n", + " (\"total_us\", \"sum\"),\n", + " (\"mean_us\", \"mean\"),\n", + " (\"median_us\", \"median\"),\n", + " (\"max_us\", \"max\"),\n", + " ]\n", + " )\n", + " .sort_values(\"total_us\", ascending=False)\n", + " )\n", + "\n", + " build_template_stats[\"total_s\"] = build_template_stats[\"total_us\"] / 1e6\n", + " build_template_stats[\"mean_ms\"] = build_template_stats[\"mean_us\"] / 1e3\n", + " build_template_stats[\"median_ms\"] = build_template_stats[\"median_us\"] / 1e3\n", + " build_template_stats[\"max_ms\"] = build_template_stats[\"max_us\"] / 1e3\n", + " build_template_stats[\"pct_template_time\"] = (\n", + " build_template_stats[\"total_us\"] / total_template_time_us\n", + " ) * 100\n", + "\n", + " display(\n", + " build_template_stats[\n", + " [\"count\", \"total_s\", \"mean_ms\", \"median_ms\", \"max_ms\", \"pct_template_time\"]\n", + " ].head(20)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files and len(all_templates_df) > 0:\n", + " print(\"Top 20 Most Frequently Instantiated Templates:\")\n", + "\n", + " template_frequency = all_templates_df[\"template_detail\"].value_counts().head(20)\n", + " display(\n", + " pd.DataFrame(\n", + " {\n", + " \"template\": template_frequency.index,\n", + " \"instantiation_count\": template_frequency.values,\n", + " }\n", + " )\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 3: Advanced Analysis Patterns\n", + "\n", + "Now that we have all the data, let's explore some advanced analysis techniques.\n", + "\n", + "### Pattern 1: Finding Optimization Opportunities\n", + "\n", + "Templates that are both frequently instantiated AND expensive per instantiation are prime optimization targets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files and len(all_templates_df) > 0:\n", + " # Calculate a \"priority score\" for optimization\n", + " # High count + high average time = high priority\n", + "\n", + " optimization_targets = all_templates_df.groupby(\"template_detail\")[\"dur\"].agg(\n", + " [(\"count\", \"count\"), (\"total_us\", \"sum\"), (\"mean_us\", \"mean\")]\n", + " )\n", + "\n", + " # Normalize count and mean to 0-1 range\n", + " optimization_targets[\"count_norm\"] = (\n", + " optimization_targets[\"count\"] - optimization_targets[\"count\"].min()\n", + " ) / (optimization_targets[\"count\"].max() - optimization_targets[\"count\"].min())\n", + " optimization_targets[\"mean_norm\"] = (\n", + " optimization_targets[\"mean_us\"] - optimization_targets[\"mean_us\"].min()\n", + " ) / (optimization_targets[\"mean_us\"].max() - optimization_targets[\"mean_us\"].min())\n", + "\n", + " # Priority score: weighted combination of frequency and cost\n", + " optimization_targets[\"priority_score\"] = (\n", + " 0.5 * optimization_targets[\"count_norm\"]\n", + " + 0.5 * optimization_targets[\"mean_norm\"]\n", + " )\n", + "\n", + " optimization_targets[\"total_s\"] = optimization_targets[\"total_us\"] / 1e6\n", + " optimization_targets[\"mean_ms\"] = optimization_targets[\"mean_us\"] / 1e3\n", + "\n", + " print(\"Top 15 Optimization Targets (High Frequency + High Cost):\")\n", + " display(\n", + " optimization_targets.sort_values(\"priority_score\", ascending=False)[\n", + " [\"count\", \"total_s\", \"mean_ms\", \"priority_score\"]\n", + " ].head(15)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pattern 2: Template Instantiation Distribution\n", + "\n", + "Understanding the distribution of template instantiation times can reveal outliers and patterns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files and len(all_templates_df) > 0:\n", + " # Calculate percentiles\n", + " percentiles = [50, 75, 90, 95, 99, 99.9]\n", + " template_percentiles = all_templates_df[\"dur\"].quantile(\n", + " [p / 100 for p in percentiles]\n", + " )\n", + "\n", + " print(\"Template Instantiation Time Distribution:\")\n", + " print(f\"{'Percentile':<15} {'Duration (ms)':>15}\")\n", + " print(\"-\" * 30)\n", + " for p, val in zip(percentiles, template_percentiles):\n", + " print(f\"P{p:<14} {val / 1e3:>15.2f}\")\n", + "\n", + " # Count templates in different time buckets\n", + " print(\"\\nTemplate Count by Duration Bucket:\")\n", + " buckets = [\n", + " (0, 1e3, \"< 1ms\"),\n", + " (1e3, 10e3, \"1-10ms\"),\n", + " (10e3, 100e3, \"10-100ms\"),\n", + " (100e3, 1e6, \"100ms-1s\"),\n", + " (1e6, float(\"inf\"), \"> 1s\"),\n", + " ]\n", + "\n", + " for low, high, label in buckets:\n", + " count = (\n", + " (all_templates_df[\"dur\"] >= low) & (all_templates_df[\"dur\"] < high)\n", + " ).sum()\n", + " pct = (count / len(all_templates_df)) * 100\n", + " print(f\"{label:<15} {count:>10,} ({pct:>5.1f}%)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pattern 3: Event Type Breakdown by Template vs Non-Template\n", + "\n", + "Compare template-related events to other compilation activities." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files and len(all_events_df) > 0:\n", + " # Identify template-related event types\n", + " template_event_types = [\n", + " \"InstantiateClass\",\n", + " \"InstantiateFunction\",\n", + " \"InstantiateVariable\",\n", + " \"InstantiateAlias\",\n", + " ]\n", + "\n", + " # Calculate time spent on template vs non-template events\n", + " template_mask = all_events_df[\"name\"].isin(template_event_types)\n", + "\n", + " template_time = all_events_df[template_mask][\"dur\"].sum()\n", + " non_template_time = all_events_df[~template_mask][\"dur\"].sum()\n", + "\n", + " print(\"Build Time Breakdown:\")\n", + " print(f\"{'Category':<30} {'Time (min)':>15} {'Percentage':>12}\")\n", + " print(\"-\" * 60)\n", + " print(\n", + " f\"{'Template Instantiation':<30} {template_time / 1e6 / 60:>15.2f} {(template_time / total_build_time_us) * 100:>11.1f}%\"\n", + " )\n", + " print(\n", + " f\"{'Other Compilation':<30} {non_template_time / 1e6 / 60:>15.2f} {(non_template_time / total_build_time_us) * 100:>11.1f}%\"\n", + " )\n", + " print(\"-\" * 60)\n", + " print(f\"{'Total':<30} {total_build_time_us / 1e6 / 60:>15.2f} {'100.0%':>12}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pattern 4: Identifying Expensive Template Patterns\n", + "\n", + "Look for common patterns in expensive templates (e.g., nested templates, specific type combinations)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if json_files and len(all_templates_df) > 0:\n", + " # Extract template base names (before '<')\n", + " all_templates_df[\"template_base\"] = (\n", + " all_templates_df[\"template_detail\"].str.split(\"<\").str[0]\n", + " )\n", + "\n", + " # Analyze by template base name\n", + " base_stats = (\n", + " all_templates_df.groupby(\"template_base\")[\"dur\"]\n", + " .agg([(\"count\", \"count\"), (\"total_us\", \"sum\"), (\"mean_us\", \"mean\")])\n", + " .sort_values(\"total_us\", ascending=False)\n", + " )\n", + "\n", + " base_stats[\"total_s\"] = base_stats[\"total_us\"] / 1e6\n", + " base_stats[\"mean_ms\"] = base_stats[\"mean_us\"] / 1e3\n", + "\n", + " print(\"Top 20 Template Base Names by Total Time:\")\n", + " display(base_stats[[\"count\", \"total_s\", \"mean_ms\"]].head(20))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 4: Practical Recommendations\n", + "\n", + "Based on the analysis above, here are practical steps for improving build times:\n", + "\n", + "### 1. Focus on High-Impact Templates\n", + "\n", + "The templates identified in the \"Optimization Targets\" section should be your first priority. Consider:\n", + "- **Explicit instantiation**: Pre-instantiate common template combinations\n", + "- **Extern templates**: Declare templates as extern to avoid redundant instantiations\n", + "- **Simplification**: Can the template be simplified or split into smaller pieces?\n", + "\n", + "### 2. Measure Progress\n", + "\n", + "After making changes:\n", + "1. Rebuild with `-ftime-trace`\n", + "2. Re-run this analysis\n", + "3. Compare the \"Top Templates\" tables to see if your changes helped\n", + "\n", + "### 3. Use This Analysis Regularly\n", + "\n", + "Make this analysis part of your development workflow:\n", + "- Run it weekly to track build time trends\n", + "- Use it to review pull requests that add new templates\n", + "- Share results with the team to raise awareness\n", + "\n", + "### 4. Consider Build System Optimizations\n", + "\n", + "Beyond code changes:\n", + "- Use precompiled headers for common includes\n", + "- Enable unity builds for related source files\n", + "- Use ccache or similar tools for incremental builds\n", + "- Consider distributed compilation (distcc, icecc)\n", + "\n", + "## Conclusion\n", + "\n", + "This notebook demonstrated how to:\n", + "\n", + "1. **Parse** Clang `-ftime-trace` files efficiently using parallel processing\n", + "2. **Transform** raw JSON into analyzable pandas DataFrames\n", + "3. **Analyze** build performance at both file and build-wide levels\n", + "4. **Identify** optimization opportunities using data-driven techniques\n", + "\n", + "The trace_analysis library makes it easy to treat C++ build performance as a big data problem, using the best tools available: pandas, parallel processing, and Jupyter notebooks.\n", + "\n", + "### Next Steps\n", + "\n", + "- Customize this notebook for your specific analysis needs\n", + "- Add visualizations using matplotlib or plotly\n", + "- Create automated reports for CI/CD pipelines\n", + "- Share insights with your team to drive build time improvements\n", + "\n", + "### Resources\n", + "\n", + "- [Clang -ftime-trace Documentation](https://releases.llvm.org/11.0.0/tools/clang/docs/ClangCommandLineReference.html#cmdoption-clang-ftime-trace)\n", + "- [Chrome Trace Event Format](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview)\n", + "- [Template Metaprogramming Performance Talk](https://www.youtube.com/watch?v=vwrXHznaYLA)\n", + "- [trace_analysis Library Documentation](../trace_analysis/README.md)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/script/analyze_build/trace_analysis/README.md b/script/analyze_build/trace_analysis/README.md new file mode 100644 index 0000000000..56fb5ef72f --- /dev/null +++ b/script/analyze_build/trace_analysis/README.md @@ -0,0 +1,235 @@ +# Trace Analysis Library + +Modern data pipeline for analyzing Clang `-ftime-trace` build performance data. Provides fast JSON parsing and conversion to pandas DataFrames for immediate analysis in scripts and Jupyter notebooks. + +## Design Principles + +1. **Modular, modern data pipeline architecture** - Clean separation between parsing, transformation, and analysis +2. **Fast path to pandas DataFrames** - Get to analyzable data structures quickly +3. **Simplicity and efficiency** - Straightforward code with optimized data structures +4. **Concurrent processing** - Leverage parallelism where it helps performance +5. **Flexible integration** - Standalone library that works well in scripts and interactive sessions + +## Quick Start + +```python +from pathlib import Path +from trace_analysis import TraceFile, TraceParser, TraceTransformer + +# Parse a trace file +trace_file = TraceFile.from_path(Path("build.json")) +events = TraceParser.parse(trace_file) + +# Convert to DataFrames +events_df = TraceTransformer.to_events_dataframe(events) +templates_df = TraceTransformer.to_templates_dataframe(events) + +# Analyze +print(f"Total events: {len(events_df):,}") +print(f"Total time: {events_df['dur'].sum() / 1e6:.2f}s") +print(f"Template time: {templates_df['dur'].sum() / 1e6:.2f}s") +``` + +## Module Overview + +### models.py - Data Models + +**TraceFile**: Lightweight metadata wrapper for trace files. + +```python +@dataclass +class TraceFile: + path: Path + size_bytes: int + mtime_ns: int + + @classmethod + def from_path(cls, path: Path) -> "TraceFile" +``` + +**Usage:** +```python +trace_file = TraceFile.from_path(Path("build.json")) +print(f"File: {trace_file.name}, Size: {trace_file.size_bytes:,} bytes") +``` + +### parser.py - JSON Parsing + +**TraceParser**: Fast JSON parsing with automatic orjson optimization. + +**Key Methods:** + +```python +@staticmethod +def parse(trace_file: TraceFile) -> List[Dict[str, Any]] + """Parse trace file and return all events.""" + +@staticmethod +def is_template_event(event: Dict[str, Any]) -> bool + """Check if event is template-related.""" + +@staticmethod +def extract_template_detail(event: Dict[str, Any]) -> str + """Extract template detail from event.""" +``` + +**Performance Notes:** +- Automatically uses `orjson` if available (1.65x faster than stdlib json) +- Gracefully falls back to standard library +- Optimized for single-line JSON format used by -ftime-trace + +**Usage:** +```python +events = TraceParser.parse(trace_file) +template_events = [e for e in events if TraceParser.is_template_event(e)] +``` + +### transformer.py - DataFrame Conversion + +**TraceTransformer**: Convert parsed events to optimized pandas DataFrames. + +**Key Methods:** + +```python +@staticmethod +def to_events_dataframe(events: List[Dict[str, Any]]) -> pd.DataFrame + """Convert all events to DataFrame with optimized dtypes.""" + +@staticmethod +def to_templates_dataframe(events: List[Dict[str, Any]]) -> pd.DataFrame + """Convert template events to DataFrame with template details.""" + +@staticmethod +def compute_file_stats(events_df: pd.DataFrame, templates_df: pd.DataFrame, + file_name: str) -> Dict[str, Any] + """Compute summary statistics for a file.""" + +@staticmethod +def aggregate_event_types(events_df: pd.DataFrame) -> pd.DataFrame + """Aggregate events by type with count, sum, mean, max.""" + +@staticmethod +def aggregate_templates(templates_df: pd.DataFrame) -> pd.DataFrame + """Aggregate template instantiations by detail.""" +``` + +**Usage:** +```python +# Convert to DataFrames +events_df = TraceTransformer.to_events_dataframe(events) +templates_df = TraceTransformer.to_templates_dataframe(events) + +# Aggregate analysis +event_summary = TraceTransformer.aggregate_event_types(events_df) +template_summary = TraceTransformer.aggregate_templates(templates_df) +``` + +## DataFrame Schemas + +> **Note**: The DataFrame schemas are evolving as analysis needs develop. The current focus is on fast intake of raw JSON data into a simple, analyzable structure. + +### Events DataFrame + +Columns and optimized dtypes: + +| Column | Type | Description | +|--------|------|-------------| +| `name` | category | Event type (e.g., "InstantiateFunction") | +| `dur` | int64 | Duration in microseconds | +| `ts` | int64 | Timestamp in microseconds | +| `pid` | int32 | Process ID | +| `tid` | int32 | Thread ID | +| `ph` | category | Phase (usually "X" for complete events) | + +**Example:** +```python +events_df.head() +# name dur ts pid tid ph +# 0 InstantiateFunction 12500 1000000 1234 1234 X +# 1 ParseClass 8300 1012500 1234 1234 X +``` + +### Templates DataFrame + +Columns for template-specific analysis: + +| Column | Type | Description | +|--------|------|-------------| +| `name` | category | Template event type | +| `dur` | int64 | Duration in microseconds | +| `template_detail` | object | Full template signature | + +**Example:** +```python +templates_df.head() +# name dur template_detail +# 0 InstantiateFunction 12500 std::vector> +# 1 InstantiateClass 8300 MyClass +``` + +## Parallel Processing Pattern + +For analyzing multiple files concurrently: + +```python +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor +import pandas as pd + +def process_file(json_path: Path): + trace_file = TraceFile.from_path(json_path) + events = TraceParser.parse(trace_file) + return TraceTransformer.to_events_dataframe(events) + +# Process all files in parallel +trace_dir = Path("../../build-trace") +json_files = list(trace_dir.rglob("*.json")) + +with ProcessPoolExecutor() as executor: + dfs = list(executor.map(process_file, json_files)) + +# Combine results +all_events = pd.concat(dfs, ignore_index=True) +``` + +## Analysis Examples + +### Top Event Types by Duration + +```python +event_totals = events_df.groupby('name', observed=True)['dur'].sum() +top_events = event_totals.sort_values(ascending=False).head(10) +print(top_events / 1e6) # Convert to seconds +``` + +### Most Expensive Templates + +```python +template_totals = templates_df.groupby('template_detail')['dur'].agg(['sum', 'count', 'mean']) +expensive = template_totals.sort_values('sum', ascending=False).head(10) +print(expensive) +``` + +### Template Time Percentage + +```python +total_time = events_df['dur'].sum() +template_time = templates_df['dur'].sum() +print(f"Template instantiation: {(template_time / total_time) * 100:.1f}% of build time") +``` + +## Future Directions + +This library is actively evolving: + +- **Schema evolution**: DataFrame schemas will expand as analysis needs develop +- **Visualization helpers**: Planned addition of common visualization utilities +- **Analysis utilities**: Additional aggregation and analysis functions as patterns emerge + +The core pipeline architecture (parse → transform → analyze) will remain stable while specific schemas and utilities evolve based on practical analysis needs. + +## See Also + +- [Main README](../README.md) - Overall project documentation and usage examples +- [examples/](../examples/) - Complete analysis scripts +- [Clang -ftime-trace docs](https://releases.llvm.org/11.0.0/tools/clang/docs/ClangCommandLineReference.html#cmdoption-clang-ftime-trace) diff --git a/script/analyze_build/trace_analysis/__init__.py b/script/analyze_build/trace_analysis/__init__.py new file mode 100644 index 0000000000..267b0f0ec1 --- /dev/null +++ b/script/analyze_build/trace_analysis/__init__.py @@ -0,0 +1,39 @@ +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +""" +Trace Analysis - Simple tools for analyzing Clang -ftime-trace build data. + +This module provides straightforward, high-performance tools for parsing and +analyzing JSON trace files generated by Clang's -ftime-trace feature. + +Key components: +- TraceParser: Fast JSON parsing with orjson support +- TraceTransformer: Convert parsed events to pandas DataFrames +- TraceFile: Simple model for trace file metadata + +Example: + from pathlib import Path + from trace_analysis import TraceFile, TraceParser, TraceTransformer + + # Parse a single file + trace_file = TraceFile.from_path(Path("build.json")) + events = TraceParser.parse(trace_file) + events_df = TraceTransformer.to_events_dataframe(events) + + # Analyze template instantiations + templates_df = TraceTransformer.to_templates_dataframe(events) + print(f"Total template time: {templates_df['dur'].sum() / 1e6:.2f}s") +""" + +from trace_analysis.models import TraceFile +from trace_analysis.parser import TraceParser +from trace_analysis.transformer import TraceTransformer + +__all__ = [ + "TraceFile", + "TraceParser", + "TraceTransformer", +] + +__version__ = "2.0.0" diff --git a/script/analyze_build/trace_analysis/models.py b/script/analyze_build/trace_analysis/models.py new file mode 100644 index 0000000000..449d2e90ad --- /dev/null +++ b/script/analyze_build/trace_analysis/models.py @@ -0,0 +1,31 @@ +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +""" +Data models for trace analysis. + +Simple data structures for representing trace files. +""" + +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class TraceFile: + """Represents a trace file to be processed.""" + + path: Path + size_bytes: int + mtime_ns: int + + @property + def name(self) -> str: + """Get the filename.""" + return self.path.name + + @classmethod + def from_path(cls, path: Path) -> "TraceFile": + """Create a TraceFile from a path.""" + stat = path.stat() + return cls(path=path, size_bytes=stat.st_size, mtime_ns=stat.st_mtime_ns) diff --git a/script/analyze_build/trace_analysis/parser.py b/script/analyze_build/trace_analysis/parser.py new file mode 100644 index 0000000000..22d65f5724 --- /dev/null +++ b/script/analyze_build/trace_analysis/parser.py @@ -0,0 +1,143 @@ +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +""" +JSON parser for trace files. + +Provides streaming JSON parsing to handle large trace files efficiently. +""" + +from typing import List, Dict, Any + +try: + import orjson + + HAS_ORJSON = True +except ImportError: + import json + + HAS_ORJSON = False + +from .models import TraceFile + + +class TraceParser: + """ + Parser for Clang -ftime-trace JSON files. + + Uses streaming JSON parsing to handle large files without loading + them entirely into memory. + """ + + # Template-related event names + TEMPLATE_EVENT_NAMES = { + "InstantiateClass", + "InstantiateFunction", + "InstantiateVariable", + "ParseTemplate", + } + + @staticmethod + def parse(trace_file: TraceFile) -> List[Dict[str, Any]]: + """ + Parse a trace file and return all events. + + Args: + trace_file: TraceFile to parse + + Returns: + List of event dictionaries + + Note: + Uses orjson if available (1.65x faster than stdlib json), + otherwise falls back to standard json library. The -ftime-trace + files are single-line JSON, so we can load them efficiently. + """ + if HAS_ORJSON: + # orjson is significantly faster (1.65x) and reads bytes + with open(trace_file.path, "rb") as f: + data = orjson.loads(f.read()) + else: + # Fallback to standard library + with open(trace_file.path, "r") as f: + data = json.load(f) + + # Handle both dict format {"traceEvents": [...]} and direct list format + if isinstance(data, dict): + return data.get("traceEvents", []) + elif isinstance(data, list): + return data + else: + return [] + + @staticmethod + def parse_stream(trace_file: TraceFile): + """ + Stream events from a trace file without loading entire file. + + Args: + trace_file: TraceFile to parse + + Yields: + Individual event dictionaries + + Note: + For compatibility, this now just yields from the parsed list. + The standard json library is much faster than ijson for these files. + """ + events = TraceParser.parse(trace_file) + for event in events: + yield event + + @staticmethod + def is_template_event(event: Dict[str, Any]) -> bool: + """ + Check if an event is template-related. + + Args: + event: Event dictionary + + Returns: + True if event is template-related + """ + return event.get("name") in TraceParser.TEMPLATE_EVENT_NAMES + + @staticmethod + def extract_template_detail(event: Dict[str, Any]) -> str: + """ + Extract template detail from an event. + + Args: + event: Event dictionary + + Returns: + Template detail string, or empty string if not available + """ + args = event.get("args", {}) + return args.get("detail", "") + + @staticmethod + def get_event_duration(event: Dict[str, Any]) -> int: + """ + Get the duration of an event in microseconds. + + Args: + event: Event dictionary + + Returns: + Duration in microseconds (0 if not available) + """ + return event.get("dur", 0) + + @staticmethod + def get_event_name(event: Dict[str, Any]) -> str: + """ + Get the name of an event. + + Args: + event: Event dictionary + + Returns: + Event name (or "Unknown" if not available) + """ + return event.get("name", "Unknown") diff --git a/script/analyze_build/trace_analysis/transformer.py b/script/analyze_build/trace_analysis/transformer.py new file mode 100644 index 0000000000..ecc937a939 --- /dev/null +++ b/script/analyze_build/trace_analysis/transformer.py @@ -0,0 +1,197 @@ +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +""" +Data transformer for converting parsed events to DataFrames. + +Transforms raw event dictionaries into structured pandas DataFrames +optimized for analysis. +""" + +from typing import List, Dict, Any +import pandas as pd + +from .parser import TraceParser + + +class TraceTransformer: + """ + Transformer for converting trace events to pandas DataFrames. + + Provides efficient conversion from raw event dictionaries to + structured DataFrames optimized for analytical queries. + """ + + @staticmethod + def to_events_dataframe(events: List[Dict[str, Any]]) -> pd.DataFrame: + """ + Convert raw events to a DataFrame. + + Args: + events: List of event dictionaries + + Returns: + DataFrame with columns: name, dur, ts, pid, tid, ph, args + """ + if not events: + return pd.DataFrame(columns=["name", "dur", "ts", "pid", "tid", "ph"]) + + # Extract key fields for efficient storage + df = pd.DataFrame( + { + "name": [e.get("name", "Unknown") for e in events], + "dur": [e.get("dur", 0) for e in events], + "ts": [e.get("ts", 0) for e in events], + "pid": [e.get("pid", 0) for e in events], + "tid": [e.get("tid", 0) for e in events], + "ph": [e.get("ph", "") for e in events], + } + ) + + # Optimize dtypes for storage + df["dur"] = df["dur"].astype("int64") + df["ts"] = df["ts"].astype("int64") + df["pid"] = df["pid"].astype("int32") + df["tid"] = df["tid"].astype("int32") + df["ph"] = df["ph"].astype("category") + df["name"] = df["name"].astype("category") + + return df + + @staticmethod + def to_templates_dataframe(events: List[Dict[str, Any]]) -> pd.DataFrame: + """ + Convert template events to a DataFrame. + + Args: + events: List of event dictionaries + + Returns: + DataFrame with template-specific information + """ + # Filter for template events + template_events = [e for e in events if TraceParser.is_template_event(e)] + + if not template_events: + return pd.DataFrame(columns=["name", "dur", "template_detail"]) + + df = pd.DataFrame( + { + "name": [e.get("name", "Unknown") for e in template_events], + "dur": [e.get("dur", 0) for e in template_events], + "template_detail": [ + TraceParser.extract_template_detail(e) for e in template_events + ], + } + ) + + # Optimize dtypes + df["dur"] = df["dur"].astype("int64") + df["name"] = df["name"].astype("category") + + return df + + @staticmethod + def compute_file_stats( + events_df: pd.DataFrame, templates_df: pd.DataFrame, file_name: str + ) -> Dict[str, Any]: + """ + Compute summary statistics for a file. + + Args: + events_df: DataFrame of all events + templates_df: DataFrame of template events + file_name: Name of the file + + Returns: + Dictionary of file statistics + """ + return { + "file_name": file_name, + "total_events": len(events_df), + "total_duration_us": int(events_df["dur"].sum()) + if len(events_df) > 0 + else 0, + "template_event_count": len(templates_df), + "template_duration_us": int(templates_df["dur"].sum()) + if len(templates_df) > 0 + else 0, + "max_event_duration_us": int(events_df["dur"].max()) + if len(events_df) > 0 + else 0, + "unique_event_types": events_df["name"].nunique() + if len(events_df) > 0 + else 0, + } + + @staticmethod + def aggregate_event_types(events_df: pd.DataFrame) -> pd.DataFrame: + """ + Aggregate events by type. + + Args: + events_df: DataFrame of events + + Returns: + DataFrame with aggregated statistics per event type + """ + if len(events_df) == 0: + return pd.DataFrame( + columns=[ + "event_type", + "count", + "total_duration", + "avg_duration", + "max_duration", + ] + ) + + agg_df = ( + events_df.groupby("name", observed=True) + .agg({"dur": ["count", "sum", "mean", "max"]}) + .reset_index() + ) + + # Flatten column names + agg_df.columns = [ + "event_type", + "count", + "total_duration", + "avg_duration", + "max_duration", + ] + + # Sort by total duration + agg_df = agg_df.sort_values("total_duration", ascending=False) + + return agg_df + + @staticmethod + def aggregate_templates(templates_df: pd.DataFrame) -> pd.DataFrame: + """ + Aggregate template instantiations. + + Args: + templates_df: DataFrame of template events + + Returns: + DataFrame with aggregated template statistics + """ + if len(templates_df) == 0: + return pd.DataFrame( + columns=["template_detail", "count", "total_duration", "avg_duration"] + ) + + agg_df = ( + templates_df.groupby("template_detail") + .agg({"dur": ["count", "sum", "mean"]}) + .reset_index() + ) + + # Flatten column names + agg_df.columns = ["template_detail", "count", "total_duration", "avg_duration"] + + # Sort by count + agg_df = agg_df.sort_values("count", ascending=False) + + return agg_df