mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-15 10:37:44 +00:00
Merge commit '8654c0628f83261d3dd64cfb4ec80e9dd2b29fa5' into develop
This commit is contained in:
4
Jenkinsfile
vendored
4
Jenkinsfile
vendored
@@ -39,10 +39,10 @@ def sendFailureNotifications() {
|
||||
// Error patterns to scan build logs for specific failure types and send detailed notifications.
|
||||
def failurePatterns = [
|
||||
[pattern: /login attempt to .* failed with status: 401 Unauthorized/, description: "Docker registry authentication failed"],
|
||||
[pattern: /docker login failed/, description: "Docker login failed"],
|
||||
[pattern: /(.*)docker login failed(.*)/, description: "Docker login failed"],
|
||||
[pattern: /HTTP request sent .* 404 Not Found/, description: "HTTP request failed with 404"],
|
||||
[pattern: /cat: .* No such file or directory/, description: "GPU not found"],
|
||||
[pattern: /GPU not found/, description: "GPU not found"],
|
||||
[pattern: /(.*)GPU not found(.*)/, description: "GPU not found"],
|
||||
[pattern: /Could not connect to Redis at .* Connection timed out/, description: "Redis connection timed out"]
|
||||
]
|
||||
|
||||
|
||||
263
script/analyze_build/README.md
Normal file
263
script/analyze_build/README.md
Normal file
@@ -0,0 +1,263 @@
|
||||
# Build Trace Analysis
|
||||
|
||||
Simple to use, fast python tools for analyzing Clang `-ftime-trace` build performance data.
|
||||
|
||||
## Overview
|
||||
|
||||
We're kicking off a systematic effort to dramatically reduce CK and CK-Tile build times, [#3575](https://github.com/ROCm/composable_kernel/issues/3575). A key part of this work is improving our C++ metaprogramming to reduce the burden on the compiler.
|
||||
|
||||
In order to prioritize work and measure our progress, we need data on template instantiation. For single files, Clang's `-ftime-trace` build performance data is easy to analyze with the Perfetto UI. The problem we are solving here is how to analyze instantiation data across thousands of compilation units.
|
||||
|
||||
The python code in this directory provides helper functions to quickly load JSON files into pandas DataFrames that can be used for analysis in Jupyter notebooks.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
script/analyze_build/
|
||||
├── trace_analysis/ # Core library
|
||||
│ ├── __init__.py # Main exports
|
||||
│ ├── parse_file.py # Fast parsing of JSON trace files
|
||||
│ ├── template_analysis.py # Template instantiation analysis
|
||||
│ ├── template_parser.py # Template name parsing utilities
|
||||
│ └── phase_breakdown.py # Compilation phase breakdown
|
||||
├── notebooks/ # Jupyter notebooks for analysis
|
||||
│ └── file_analysis_example.ipynb # Template analysis example
|
||||
├── requirements.txt # Python dependencies
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
## Python Requirements
|
||||
|
||||
See `requirements.txt` for the complete list of dependencies:
|
||||
* **pandas** - DataFrame manipulation and analysis
|
||||
* **orjson** - Fast JSON parsing for trace files
|
||||
* **plotly** - Interactive visualizations (sunburst, treemap)
|
||||
* **nbformat** - Jupyter notebook format support
|
||||
* **ipykernel** - Kernel for running notebooks in VSCode/Jupyter
|
||||
* **kaleido** - Static image export from Plotly charts
|
||||
* **jupyter** - Full Jupyter environment
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Setup
|
||||
|
||||
1. Create a virtual environment (recommended):
|
||||
```bash
|
||||
cd script/analyze_build
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Install VSCode extensions if you want to run notebooks in VSCode:
|
||||
* Jupyter
|
||||
* Data Wrangler (interact with Pandas DataFrames)
|
||||
|
||||
### Analyzing a Single File
|
||||
|
||||
Use the `parse_file` function to load a `-ftime-trace` JSON file into a Pandas DataFrame:
|
||||
|
||||
```python
|
||||
from trace_analysis import parse_file
|
||||
|
||||
# Parse the trace file
|
||||
df = parse_file('path/to/trace.json')
|
||||
|
||||
# View basic info
|
||||
print(f"Total events: {len(df)}")
|
||||
print(df.columns)
|
||||
|
||||
# Analyze duration statistics
|
||||
print(df['dur'].describe())
|
||||
```
|
||||
|
||||
### Extracting Compilation Metadata
|
||||
|
||||
Get high-level metadata about the compilation:
|
||||
|
||||
```python
|
||||
from trace_analysis import get_metadata
|
||||
|
||||
# Extract metadata from trace file
|
||||
metadata = get_metadata('trace.json')
|
||||
|
||||
print(f"Source file: {metadata['source_file']}")
|
||||
print(f"Compilation time: {metadata['total_wall_time_s']:.2f}s")
|
||||
print(f"Started: {metadata['wall_start_datetime']}")
|
||||
print(f"Ended: {metadata['wall_end_datetime']}")
|
||||
```
|
||||
|
||||
The metadata includes:
|
||||
- `source_file`: Main .cpp/.c file being compiled
|
||||
- `time_granularity`: Time unit used ("microseconds")
|
||||
- `beginning_of_time`: Epoch timestamp in microseconds
|
||||
- `wall_start_time`: Wall clock start (microseconds since epoch)
|
||||
- `wall_end_time`: Wall clock end (microseconds since epoch)
|
||||
- `wall_start_datetime`: Human-readable start time
|
||||
- `wall_end_datetime`: Human-readable end time
|
||||
- `total_wall_time_us`: Total compilation time in microseconds
|
||||
- `total_wall_time_s`: Total compilation time in seconds
|
||||
|
||||
### Template Instantiation Analysis
|
||||
|
||||
The module includes specialized functions for analyzing C++ template instantiation costs:
|
||||
|
||||
```python
|
||||
from trace_analysis import (
|
||||
parse_file,
|
||||
get_template_instantiation_events,
|
||||
get_phase_breakdown,
|
||||
)
|
||||
|
||||
df = parse_file('trace.json')
|
||||
|
||||
# Get all template instantiation events with parsed template information
|
||||
template_events = get_template_instantiation_events(df)
|
||||
|
||||
# The returned DataFrame includes parsed columns:
|
||||
# - namespace: Top-level namespace (e.g., 'std', 'ck')
|
||||
# - template_name: Template name without parameters
|
||||
# - full_qualified_name: Full namespace::template_name
|
||||
# - param_count: Number of template parameters
|
||||
# - is_ck_type: Boolean indicating CK library types
|
||||
# - is_nested: Boolean indicating nested templates
|
||||
|
||||
# Find slowest template instantiations
|
||||
top_templates = template_events.nlargest(20, 'dur')
|
||||
print(top_templates[['template_name', 'namespace', 'param_count', 'dur']])
|
||||
|
||||
# Analyze by namespace
|
||||
namespace_summary = template_events.groupby('namespace').agg({
|
||||
'dur': ['count', 'sum', 'mean']
|
||||
})
|
||||
print(namespace_summary)
|
||||
```
|
||||
|
||||
### Compilation Phase Breakdown
|
||||
|
||||
Analyze how compilation time is distributed across different phases:
|
||||
|
||||
```python
|
||||
from trace_analysis import get_phase_breakdown, PhaseBreakdown
|
||||
|
||||
df = parse_file('trace.json')
|
||||
|
||||
# Get hierarchical phase breakdown
|
||||
breakdown = get_phase_breakdown(df)
|
||||
|
||||
# Display in Jupyter (automatic rich HTML display)
|
||||
display(breakdown)
|
||||
|
||||
# Print text representation
|
||||
print(breakdown)
|
||||
|
||||
# Access the underlying DataFrame
|
||||
print(breakdown.df)
|
||||
|
||||
# Convert to plotly format for visualization
|
||||
import plotly.express as px
|
||||
data = breakdown.to_plotly()
|
||||
fig = px.sunburst(**data)
|
||||
fig.show()
|
||||
```
|
||||
|
||||
The `PhaseBreakdown` class provides:
|
||||
- Hierarchical breakdown of compilation phases
|
||||
- Automatic calculation of "Other" residual time at each level
|
||||
- Validation that children don't exceed parent durations
|
||||
- Multiple output formats (text, DataFrame, Plotly)
|
||||
|
||||
## DataFrame Schema
|
||||
|
||||
The parsed DataFrame contains the following columns from the `-ftime-trace` format:
|
||||
|
||||
- `name`: Event name (function, template instantiation, etc.)
|
||||
- `ph`: Phase character ('X' for complete, 'B' for begin, 'E' for end, 'i' for instant)
|
||||
- `ts`: Timestamp in microseconds
|
||||
- `dur`: Duration in microseconds (for complete events)
|
||||
- `pid`: Process ID
|
||||
- `tid`: Thread ID
|
||||
- `arg_*`: Flattened arguments from the event's `args` field
|
||||
|
||||
### Template Event Columns
|
||||
|
||||
When using `get_template_instantiation_events()`, additional parsed columns are included:
|
||||
|
||||
- `namespace`: Top-level namespace extracted from the template name
|
||||
- `template_name`: Template name without namespace or parameters
|
||||
- `full_qualified_name`: Complete namespace::template_name
|
||||
- `param_count`: Number of template parameters
|
||||
- `is_ck_type`: Boolean flag for CK library types (namespace starts with 'ck')
|
||||
- `is_nested`: Boolean flag indicating nested template instantiations
|
||||
|
||||
## Use in Jupyter Notebooks
|
||||
|
||||
The module is designed to work seamlessly in Jupyter notebooks. See `notebooks/file_analysis_example.ipynb` for a complete example workflow that demonstrates:
|
||||
|
||||
- Loading and parsing trace files
|
||||
- Extracting compilation metadata
|
||||
- Analyzing phase breakdown with visualizations
|
||||
- Template instantiation analysis with parsed columns
|
||||
- Filtering and grouping by namespace
|
||||
- Identifying CK-specific template costs
|
||||
|
||||
To use in a notebook:
|
||||
|
||||
```python
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add trace_analysis to path
|
||||
sys.path.insert(0, str(Path.cwd().parent))
|
||||
|
||||
from trace_analysis import (
|
||||
parse_file,
|
||||
get_metadata,
|
||||
get_template_instantiation_events,
|
||||
get_phase_breakdown,
|
||||
)
|
||||
|
||||
# Load and analyze
|
||||
df = parse_file('path/to/trace.json')
|
||||
breakdown = get_phase_breakdown(df)
|
||||
templates = get_template_instantiation_events(df)
|
||||
|
||||
# Visualize
|
||||
import plotly.express as px
|
||||
fig = px.sunburst(**breakdown.to_plotly())
|
||||
fig.show()
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
### Core Functions
|
||||
|
||||
- `parse_file(filepath)`: Parse a `-ftime-trace` JSON file into a pandas DataFrame
|
||||
- `get_metadata(filepath_or_df)`: Extract compilation metadata from trace file or DataFrame
|
||||
|
||||
### Template Analysis
|
||||
|
||||
- `get_template_instantiation_events(df)`: Filter to template instantiation events with parsed template information
|
||||
|
||||
### Phase Breakdown
|
||||
|
||||
- `get_phase_breakdown(df)`: Generate hierarchical compilation phase breakdown
|
||||
- `PhaseBreakdown`: Class representing phase breakdown with multiple output formats
|
||||
|
||||
## Contributing
|
||||
|
||||
This is an experimental project for analyzing and improving C++ metaprogramming build times. Contributions are welcome! When adding new analysis functions:
|
||||
|
||||
1. Add the function to the appropriate module in `trace_analysis/`
|
||||
2. Export it in `__init__.py`
|
||||
3. Update this README with usage examples
|
||||
4. Consider adding a notebook example if the feature is substantial
|
||||
|
||||
## License
|
||||
|
||||
Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
SPDX-License-Identifier: MIT
|
||||
247
script/analyze_build/notebooks/file_analysis_example.ipynb
Normal file
247
script/analyze_build/notebooks/file_analysis_example.ipynb
Normal file
@@ -0,0 +1,247 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Template Instantiation Analysis Example\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use the template analysis functions to understand C++ template instantiation costs in Clang's `-ftime-trace` output.\n",
|
||||
"\n",
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"# Add parent directory to path\n",
|
||||
"sys.path.insert(0, str(Path.cwd().parent))\n",
|
||||
"\n",
|
||||
"from trace_analysis import (\n",
|
||||
" parse_file,\n",
|
||||
" get_template_instantiation_events,\n",
|
||||
" get_phase_breakdown,\n",
|
||||
" get_metadata,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from datetime import datetime\n",
|
||||
"import plotly.express as px\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Display settings\n",
|
||||
"pd.set_option(\"display.max_rows\", 100)\n",
|
||||
"pd.set_option(\"display.max_columns\", None)\n",
|
||||
"pd.set_option(\"display.width\", None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Trace File"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load your trace file\n",
|
||||
"trace_file = Path(\n",
|
||||
" \"../../../build-trace/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.json\"\n",
|
||||
")\n",
|
||||
"df = parse_file(trace_file)\n",
|
||||
"\n",
|
||||
"print(f\"Total events: {len(df):,}\")\n",
|
||||
"starting_timestamp = datetime.fromtimestamp(df.attrs[\"beginningOfTime\"] / 1e6)\n",
|
||||
"print(f\"Starting timestamp: {starting_timestamp.strftime('%Y-%m-%d:%H:%M:%S')}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"get_metadata(df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Compilation Overview"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get phase breakdown and display it\n",
|
||||
"breakdown = get_phase_breakdown(df)\n",
|
||||
"print(breakdown)\n",
|
||||
"display(breakdown)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Extract data for plotly charts (sunburst, tree-map, or icicle)\n",
|
||||
"plotly_data = breakdown.to_plotly()\n",
|
||||
"fig = px.sunburst(**plotly_data)\n",
|
||||
"fig.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Template Instantiation Analysis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get all template instantiation events (now with parsed columns!)\n",
|
||||
"template_events = get_template_instantiation_events(df)\n",
|
||||
"\n",
|
||||
"print(f\"Total template instantiation events: {len(template_events):,}\")\n",
|
||||
"print(f\"Total template time: {template_events['dur'].sum() / 1000:.1f} ms\")\n",
|
||||
"display(template_events)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Examine Parsed Columns\n",
|
||||
"\n",
|
||||
"The `get_template_instantiation_events()` function automatically parses the `arg_detail` column into structured fields:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Show the new parsed columns\n",
|
||||
"print(\"Parsed columns available:\")\n",
|
||||
"print(\"- namespace: Top-level namespace (e.g., 'std', 'ck')\")\n",
|
||||
"print(\"- template_name: Template name without parameters\")\n",
|
||||
"print(\"- full_qualified_name: Full namespace::template_name\")\n",
|
||||
"print(\"- param_count: Number of template parameters\")\n",
|
||||
"print(\"- is_ck_type: Boolean indicating CK library types\")\n",
|
||||
"print(\"- is_nested: Boolean indicating nested templates\")\n",
|
||||
"print()\n",
|
||||
"\n",
|
||||
"# Display sample of parsed data\n",
|
||||
"template_events[\n",
|
||||
" [\"namespace\", \"template_name\", \"param_count\", \"is_ck_type\", \"is_nested\", \"dur\"]\n",
|
||||
"].head(20)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Analysis by Namespace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Group by namespace to see where time is spent\n",
|
||||
"namespace_summary = (\n",
|
||||
" template_events.groupby(\"namespace\")\n",
|
||||
" .agg({\"dur\": [\"count\", \"sum\", \"mean\"], \"param_count\": \"mean\"})\n",
|
||||
" .round(2)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"namespace_summary.columns = [\"count\", \"total_dur\", \"avg_dur\", \"avg_params\"]\n",
|
||||
"namespace_summary[\"total_ms\"] = namespace_summary[\"total_dur\"] / 1000\n",
|
||||
"namespace_summary = namespace_summary.sort_values(\"total_dur\", ascending=False)\n",
|
||||
"\n",
|
||||
"print(\"\\nTemplate Instantiation Time by Namespace:\")\n",
|
||||
"display(namespace_summary)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### CK Library Templates Analysis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Filter to CK types only\n",
|
||||
"ck_templates = template_events[template_events[\"is_ck_type\"]].copy()\n",
|
||||
"\n",
|
||||
"print(f\"CK template instantiations: {len(ck_templates):,}\")\n",
|
||||
"print(f\"CK template time: {ck_templates['dur'].sum() / 1000:.1f} ms\")\n",
|
||||
"print(\n",
|
||||
" f\"Percentage of total template time: {100 * ck_templates['dur'].sum() / template_events['dur'].sum():.1f}%\"\n",
|
||||
")\n",
|
||||
"print()\n",
|
||||
"\n",
|
||||
"# Top CK templates by time\n",
|
||||
"ck_by_name = (\n",
|
||||
" ck_templates.groupby(\"template_name\")\n",
|
||||
" .agg({\"dur\": [\"count\", \"sum\", \"mean\"]})\n",
|
||||
" .round(2)\n",
|
||||
")\n",
|
||||
"ck_by_name.columns = [\"count\", \"total_dur\", \"avg_dur\"]\n",
|
||||
"ck_by_name[\"total_ms\"] = ck_by_name[\"total_dur\"] / 1000\n",
|
||||
"ck_by_name = ck_by_name.sort_values(\"total_dur\", ascending=False)\n",
|
||||
"\n",
|
||||
"print(\"\\nTop CK Templates by Total Time:\")\n",
|
||||
"display(ck_by_name.head(20))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
18
script/analyze_build/requirements.txt
Normal file
18
script/analyze_build/requirements.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
# Build Trace Analysis - Python Dependencies
|
||||
|
||||
# Core data processing
|
||||
pandas>=2.0.0
|
||||
orjson>=3.9.0
|
||||
|
||||
# Jupyter notebook support
|
||||
nbformat>=4.2.0
|
||||
ipykernel>=6.0.0
|
||||
|
||||
# Interactive visualizations
|
||||
plotly>=5.0.0
|
||||
|
||||
# Static image export from Plotly
|
||||
kaleido>=0.2.0
|
||||
|
||||
# Full Jupyter environment (if not using VSCode)
|
||||
jupyter>=1.0.0
|
||||
34
script/analyze_build/trace_analysis/__init__.py
Normal file
34
script/analyze_build/trace_analysis/__init__.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""
|
||||
Build Trace Analysis - Core library for analyzing Clang -ftime-trace data.
|
||||
|
||||
This package provides tools to parse and analyze Clang's -ftime-trace JSON output
|
||||
for build performance analysis.
|
||||
"""
|
||||
|
||||
from .parse_file import (
|
||||
parse_file,
|
||||
get_metadata,
|
||||
)
|
||||
|
||||
from .template_analysis import (
|
||||
get_template_instantiation_events,
|
||||
)
|
||||
|
||||
from .phase_breakdown import (
|
||||
get_phase_breakdown,
|
||||
PhaseBreakdown,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Core parsing and filtering
|
||||
"parse_file",
|
||||
"get_metadata",
|
||||
# Template analysis
|
||||
"get_template_instantiation_events",
|
||||
# Phase breakdown
|
||||
"get_phase_breakdown",
|
||||
"PhaseBreakdown",
|
||||
]
|
||||
356
script/analyze_build/trace_analysis/parse_file.py
Normal file
356
script/analyze_build/trace_analysis/parse_file.py
Normal file
@@ -0,0 +1,356 @@
|
||||
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""
|
||||
Parse a single Clang -ftime-trace JSON file into a Pandas DataFrame.
|
||||
|
||||
This module provides fast parsing of Clang's -ftime-trace output using orjson
|
||||
for performance. The JSON file is typically a single-line array of trace events.
|
||||
"""
|
||||
|
||||
import orjson
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from typing import Union, Optional
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
# Expected schema for trace event DataFrames with optimized dtypes
|
||||
# This enforces strict column validation and memory-efficient types
|
||||
# The memory usage is dominated by arg detail, but we optimize each series.
|
||||
TRACE_EVENT_DTYPES = {
|
||||
"pid": "int32", # Process ID (max observed: ~2.3M, fits in int32)
|
||||
"tid": "int32", # Thread ID (max observed: ~2.3M, fits in int32)
|
||||
"ts": "int64", # Timestamp in microseconds (requires int64 for epoch times)
|
||||
"cat": "category", # Category (low cardinality, use categorical)
|
||||
"ph": "category", # Phase type (very low cardinality: X, B, E, i, etc.)
|
||||
"id": "int64", # Event ID
|
||||
"name": "category", # Event name (medium cardinality, use categorical)
|
||||
"dur": "int64", # Duration in microseconds (max 10 days = 864B μs, requires int64)
|
||||
"arg_detail": "string", # Detail string (high cardinality, keep as string)
|
||||
"arg_count": "int64", # Argument count
|
||||
"arg_avg ms": "int64", # Average milliseconds
|
||||
"arg_name": "category", # Argument name (medium cardinality, use categorical)
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileMetadata:
|
||||
"""
|
||||
Processed metadata with computed fields for compilation analysis.
|
||||
|
||||
This extends the raw metadata with derived values like formatted timestamps
|
||||
and converted time units for convenience.
|
||||
|
||||
Attributes:
|
||||
source_file: Main .cpp/.c file being compiled
|
||||
time_granularity: Time unit used in trace (always "microseconds" for Clang)
|
||||
beginning_of_time: Epoch timestamp in microseconds from JSON root
|
||||
execute_compiler_ts: Timestamp of ExecuteCompiler event (microseconds)
|
||||
execute_compiler_dur: Duration of ExecuteCompiler event (microseconds)
|
||||
total_wall_time_us: Total compilation time in microseconds (same as execute_compiler_dur)
|
||||
total_wall_time_s: Total compilation time in seconds (computed from microseconds)
|
||||
wall_start_time: Wall clock start time in microseconds since epoch (computed)
|
||||
wall_end_time: Wall clock end time in microseconds since epoch (computed)
|
||||
wall_start_datetime: Human-readable start time string (formatted)
|
||||
wall_end_datetime: Human-readable end time string (formatted)
|
||||
"""
|
||||
|
||||
source_file: Optional[str] = None
|
||||
time_granularity: str = "microseconds"
|
||||
beginning_of_time: Optional[int] = None
|
||||
execute_compiler_ts: Optional[int] = None
|
||||
execute_compiler_dur: Optional[int] = None
|
||||
total_wall_time_us: Optional[int] = None
|
||||
total_wall_time_s: Optional[float] = None
|
||||
wall_start_time: Optional[int] = None
|
||||
wall_end_time: Optional[int] = None
|
||||
wall_start_datetime: Optional[str] = None
|
||||
wall_end_datetime: Optional[str] = None
|
||||
|
||||
def __repr__(self):
|
||||
# auto-generate pretty lines
|
||||
fields = "\n".join(
|
||||
f" {name} = {value!r}" for name, value in self.__dict__.items()
|
||||
)
|
||||
return f"{self.__class__.__name__}(\n{fields}\n)"
|
||||
|
||||
|
||||
def parse_file(filepath: Union[str, Path]) -> pd.DataFrame:
|
||||
"""
|
||||
Parse a Clang -ftime-trace JSON file into a Pandas DataFrame.
|
||||
|
||||
The -ftime-trace format is a JSON array of trace events. Each event contains
|
||||
fields like name, phase (ph), timestamp (ts), duration (dur), process/thread IDs,
|
||||
and optional arguments (args).
|
||||
|
||||
The beginningOfTime value from the JSON structure is automatically extracted
|
||||
and stored in df.attrs['beginningOfTime']. Use get_metadata(df) to get
|
||||
processed metadata with event-derived fields and computed values.
|
||||
|
||||
Args:
|
||||
filepath: Path to the -ftime-trace JSON file
|
||||
|
||||
Returns:
|
||||
DataFrame with columns for each event field. Nested 'args' are flattened
|
||||
with an 'arg_' prefix. The beginningOfTime value is stored in
|
||||
df.attrs['beginningOfTime'].
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the file doesn't exist
|
||||
ValueError: If the JSON is invalid or empty
|
||||
|
||||
Examples:
|
||||
>>> df = parse_file('build/trace.json')
|
||||
>>> df[['name', 'dur']].head()
|
||||
>>>
|
||||
>>> # Access processed metadata
|
||||
>>> metadata = get_metadata(df)
|
||||
>>> print(f"Compiled: {metadata.source_file}")
|
||||
>>> print(f"Duration: {metadata.total_wall_time_s:.2f}s")
|
||||
>>>
|
||||
>>> # Access beginningOfTime directly if needed
|
||||
>>> beginning = df.attrs.get('beginningOfTime')
|
||||
>>> print(f"Beginning of time: {beginning}")
|
||||
"""
|
||||
filepath = Path(filepath)
|
||||
|
||||
if not filepath.exists():
|
||||
raise FileNotFoundError(f"Trace file not found: {filepath}")
|
||||
|
||||
# Read and parse JSON using orjson for speed
|
||||
with open(filepath, "rb") as f:
|
||||
data = orjson.loads(f.read())
|
||||
|
||||
if not data:
|
||||
raise ValueError(f"Empty trace data in file: {filepath}")
|
||||
|
||||
# Handle both formats: direct array or {"traceEvents": [...]}
|
||||
if isinstance(data, dict):
|
||||
if "traceEvents" in data:
|
||||
events = data["traceEvents"]
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Expected 'traceEvents' key in JSON object, got keys: {list(data.keys())}"
|
||||
)
|
||||
elif isinstance(data, list):
|
||||
events = data
|
||||
else:
|
||||
raise ValueError(f"Expected JSON array or object, got {type(data).__name__}")
|
||||
|
||||
# Convert to DataFrame
|
||||
df = pd.DataFrame(events)
|
||||
|
||||
if df.empty:
|
||||
raise ValueError(f"No trace events found in file: {filepath}")
|
||||
|
||||
# Flatten 'args' column if it exists
|
||||
if "args" in df.columns:
|
||||
df = _flatten_args(df)
|
||||
|
||||
# Validate schema: check for missing columns
|
||||
expected_columns = set(TRACE_EVENT_DTYPES.keys())
|
||||
actual_columns = set(df.columns)
|
||||
|
||||
missing_columns = expected_columns - actual_columns
|
||||
if missing_columns:
|
||||
raise ValueError(
|
||||
f"Missing expected columns in trace data: {sorted(missing_columns)}"
|
||||
)
|
||||
|
||||
# Validate schema: check for unexpected columns
|
||||
unexpected_columns = actual_columns - expected_columns
|
||||
if unexpected_columns:
|
||||
raise ValueError(
|
||||
f"Unexpected columns found in trace data: {sorted(unexpected_columns)}"
|
||||
)
|
||||
|
||||
# Apply optimized dtypes with strict type enforcement
|
||||
for col, dtype in TRACE_EVENT_DTYPES.items():
|
||||
if dtype in ("int64", "int32"):
|
||||
# Fill missing values with 0 for integer columns, then convert to specified int type
|
||||
df[col] = df[col].fillna(0).astype(dtype)
|
||||
elif dtype == "category":
|
||||
# Convert to categorical for memory efficiency with repeated values
|
||||
df[col] = df[col].astype("category")
|
||||
elif dtype == "string":
|
||||
# Convert to pandas string dtype for memory efficiency
|
||||
df[col] = df[col].astype("string")
|
||||
else:
|
||||
raise ValueError(f"Unsupported dtype '{dtype}' for column '{col}'")
|
||||
|
||||
# Extract and store beginningOfTime in DataFrame attributes
|
||||
df.attrs["beginningOfTime"] = (
|
||||
data.get("beginningOfTime") if isinstance(data, dict) else None
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def _flatten_args(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Flatten the 'args' column into separate columns with 'arg_' prefix.
|
||||
|
||||
The 'args' field in trace events contains additional metadata as a dictionary.
|
||||
This function extracts those key-value pairs into separate columns.
|
||||
|
||||
Args:
|
||||
df: DataFrame with an 'args' column containing dictionaries
|
||||
|
||||
Returns:
|
||||
DataFrame with flattened args columns and original 'args' column removed
|
||||
"""
|
||||
# Extract args into separate DataFrame
|
||||
args_data = []
|
||||
for idx, row in df.iterrows():
|
||||
args = row.get("args", {})
|
||||
if isinstance(args, dict):
|
||||
args_data.append(args)
|
||||
else:
|
||||
args_data.append({})
|
||||
|
||||
if args_data:
|
||||
args_df = pd.DataFrame(args_data)
|
||||
# Prefix all args columns with 'arg_'
|
||||
args_df.columns = [f"arg_{col}" for col in args_df.columns]
|
||||
|
||||
# Drop original args column and concatenate flattened args
|
||||
df = df.drop(columns=["args"])
|
||||
df = pd.concat([df, args_df], axis=1)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def _normalize_source_path(file_path: str) -> str:
|
||||
"""
|
||||
Normalize a source file path to be relative to composable_kernel if present.
|
||||
|
||||
If 'composable_kernel' appears in the path, returns the path starting from
|
||||
'composable_kernel/'. Otherwise, returns the original path unchanged.
|
||||
|
||||
Args:
|
||||
file_path: Full filesystem path to a source file
|
||||
|
||||
Returns:
|
||||
Normalized path starting from composable_kernel, or original path if
|
||||
composable_kernel is not found
|
||||
|
||||
Examples:
|
||||
>>> _normalize_source_path('/home/user/composable_kernel/include/ck/tensor.hpp')
|
||||
'composable_kernel/include/ck/tensor.hpp'
|
||||
>>> _normalize_source_path('/usr/include/vector')
|
||||
'/usr/include/vector'
|
||||
"""
|
||||
path = Path(file_path)
|
||||
parts = path.parts
|
||||
|
||||
# Find the last occurrence of 'composable_kernel' in the path
|
||||
for i in range(len(parts) - 1, -1, -1):
|
||||
if parts[i] == "composable_kernel":
|
||||
# Return path from composable_kernel onwards
|
||||
return str(Path(*parts[i:]))
|
||||
|
||||
# If composable_kernel not found, return original path
|
||||
return file_path
|
||||
|
||||
|
||||
def get_metadata(df: pd.DataFrame) -> FileMetadata:
|
||||
"""
|
||||
Extract and process compilation metadata from a DataFrame.
|
||||
|
||||
This function processes events from the DataFrame to extract compilation
|
||||
information, then computes derived fields like formatted timestamps and
|
||||
converted time units.
|
||||
|
||||
Args:
|
||||
df: DataFrame returned by parse_file() with beginningOfTime in its .attrs
|
||||
|
||||
Returns:
|
||||
FileMetadata instance with both raw and computed fields:
|
||||
- source_file: Main .cpp/.c file being compiled (from events)
|
||||
- time_granularity: Time unit used in trace ("microseconds")
|
||||
- beginning_of_time: Epoch timestamp in microseconds from JSON root
|
||||
- execute_compiler_ts: Timestamp of ExecuteCompiler event (from events)
|
||||
- execute_compiler_dur: Duration of ExecuteCompiler event (from events)
|
||||
- total_wall_time_us: Total compilation time in microseconds
|
||||
- total_wall_time_s: Total compilation time in seconds (computed)
|
||||
- wall_start_time: Wall clock start time (computed)
|
||||
- wall_end_time: Wall clock end time (computed)
|
||||
- wall_start_datetime: Human-readable start time (formatted)
|
||||
- wall_end_datetime: Human-readable end time (formatted)
|
||||
|
||||
Examples:
|
||||
>>> df = parse_file('trace.json')
|
||||
>>> metadata = get_metadata(df)
|
||||
>>> print(f"Compiled: {metadata.source_file}")
|
||||
>>> print(f"Duration: {metadata.total_wall_time_s:.2f}s")
|
||||
>>> print(f"Started: {metadata.wall_start_datetime}")
|
||||
"""
|
||||
# Extract beginningOfTime from DataFrame attributes
|
||||
beginning_of_time = None
|
||||
if hasattr(df, "attrs"):
|
||||
beginning_of_time = df.attrs.get("beginningOfTime")
|
||||
|
||||
# Initialize metadata with beginningOfTime from JSON structure
|
||||
metadata = FileMetadata(beginning_of_time=beginning_of_time)
|
||||
|
||||
# Process events to extract ExecuteCompiler timing information
|
||||
if "name" in df.columns:
|
||||
execute_compiler = df[df["name"] == "ExecuteCompiler"]
|
||||
if not execute_compiler.empty:
|
||||
# Get the first ExecuteCompiler event
|
||||
event = execute_compiler.iloc[0]
|
||||
if "ts" in event:
|
||||
metadata.execute_compiler_ts = event["ts"]
|
||||
if "dur" in event:
|
||||
metadata.execute_compiler_dur = event["dur"]
|
||||
|
||||
# Process events to find the main source file being compiled
|
||||
if "name" in df.columns and "arg_detail" in df.columns:
|
||||
# Look for ParseDeclarationOrFunctionDefinition events with .cpp or .c files
|
||||
source_extensions = (".cpp", ".cc", ".cxx", ".c")
|
||||
parse_events = df[df["name"] == "ParseDeclarationOrFunctionDefinition"]
|
||||
|
||||
for _, event in parse_events.iterrows():
|
||||
detail = event.get("arg_detail", "")
|
||||
if detail:
|
||||
# Extract file path (may include line:column info)
|
||||
file_path = str(detail).split(":")[0]
|
||||
|
||||
# Check if it's a source file (not a header)
|
||||
if any(file_path.endswith(ext) for ext in source_extensions):
|
||||
metadata.source_file = _normalize_source_path(file_path)
|
||||
break
|
||||
|
||||
# Compute derived fields
|
||||
if metadata.execute_compiler_dur is not None:
|
||||
metadata.total_wall_time_us = metadata.execute_compiler_dur
|
||||
metadata.total_wall_time_s = metadata.execute_compiler_dur / 1_000_000.0
|
||||
|
||||
# Calculate wall clock times if we have the necessary data
|
||||
if (
|
||||
metadata.beginning_of_time is not None
|
||||
and metadata.execute_compiler_ts is not None
|
||||
and metadata.execute_compiler_dur is not None
|
||||
):
|
||||
metadata.wall_start_time = (
|
||||
metadata.beginning_of_time + metadata.execute_compiler_ts
|
||||
)
|
||||
metadata.wall_end_time = (
|
||||
metadata.wall_start_time + metadata.execute_compiler_dur
|
||||
)
|
||||
|
||||
# Convert to human-readable datetime strings
|
||||
try:
|
||||
start_dt = datetime.fromtimestamp(metadata.wall_start_time / 1_000_000.0)
|
||||
end_dt = datetime.fromtimestamp(metadata.wall_end_time / 1_000_000.0)
|
||||
metadata.wall_start_datetime = start_dt.strftime("%Y-%m-%d %H:%M:%S.%f")[
|
||||
:-3
|
||||
]
|
||||
metadata.wall_end_datetime = end_dt.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
except (OSError, ValueError):
|
||||
# Handle invalid timestamps gracefully
|
||||
pass
|
||||
|
||||
return metadata
|
||||
354
script/analyze_build/trace_analysis/phase_breakdown.py
Normal file
354
script/analyze_build/trace_analysis/phase_breakdown.py
Normal file
@@ -0,0 +1,354 @@
|
||||
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""
|
||||
Phase breakdown analysis for Clang -ftime-trace data.
|
||||
|
||||
This module provides hierarchical breakdown of compilation phases using
|
||||
the pre-aggregated "Total" events from Clang's -ftime-trace output.
|
||||
|
||||
The data is returned as a PhaseBreakdown object with rich display and
|
||||
analysis capabilities optimized for Jupyter notebooks.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from collections import namedtuple
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# Lightweight namedtuple for iteration
|
||||
Phase = namedtuple("Phase", ["name", "depth", "duration", "duration_ms", "percentage"])
|
||||
|
||||
|
||||
class PhaseBreakdown:
|
||||
"""
|
||||
Wrapper for compilation phase breakdown with notebook-friendly API.
|
||||
|
||||
Provides hierarchical view of compilation phases from Clang -ftime-trace,
|
||||
with rich display, filtering, and visualization capabilities.
|
||||
|
||||
Examples:
|
||||
>>> breakdown = get_phase_breakdown(df)
|
||||
>>>
|
||||
>>> # Display in Jupyter
|
||||
>>> breakdown
|
||||
>>>
|
||||
>>> # Access specific phases
|
||||
>>> breakdown['InstantiateFunction']
|
||||
>>> breakdown.frontend
|
||||
>>> breakdown.backend
|
||||
>>>
|
||||
>>> # Get metrics
|
||||
>>> print(f"Total: {breakdown.total_ms:.1f}ms")
|
||||
>>>
|
||||
>>> # Top N analysis
|
||||
>>> breakdown.top(10)
|
||||
>>> breakdown.frontend.top(5)
|
||||
>>>
|
||||
>>> # Visualization
|
||||
>>> import plotly.express as px
|
||||
>>> data = breakdown.to_plotly()
|
||||
>>> fig = px.sunburst(**data)
|
||||
>>> fig.show()
|
||||
>>>
|
||||
>>> # Iteration
|
||||
>>> for phase in breakdown:
|
||||
>>> print(f"{phase.name}: {phase.duration_ms:.1f}ms")
|
||||
"""
|
||||
|
||||
def __init__(self, df: pd.DataFrame):
|
||||
"""
|
||||
Initialize from phase breakdown DataFrame.
|
||||
|
||||
Args:
|
||||
df: DataFrame with columns name, parent, depth, duration
|
||||
"""
|
||||
if df.empty:
|
||||
self._df = pd.DataFrame(columns=["name", "parent", "depth", "duration"])
|
||||
self._total_time = 0
|
||||
else:
|
||||
self._df = df
|
||||
self._total_time = self._get_total_time()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Simple text representation for console."""
|
||||
if self._df.empty:
|
||||
return "PhaseBreakdown(empty)"
|
||||
n_phases = len(self._df)
|
||||
return f"PhaseBreakdown({n_phases} phases, {self._total_time:.1f}ms total)"
|
||||
|
||||
def _repr_html_(self) -> str:
|
||||
"""Rich HTML representation for Jupyter notebooks."""
|
||||
if self._df.empty:
|
||||
return "<div><i>PhaseBreakdown(empty)</i></div>"
|
||||
return self.to_dataframe()._repr_html_()
|
||||
|
||||
@property
|
||||
def df(self) -> pd.DataFrame:
|
||||
"""
|
||||
Access underlying DataFrame.
|
||||
|
||||
Returns:
|
||||
DataFrame with columns name, parent, depth, duration
|
||||
"""
|
||||
return self._df
|
||||
|
||||
def to_dataframe(self, show_percentages: bool = True) -> pd.DataFrame:
|
||||
"""
|
||||
Format as DataFrame for display.
|
||||
|
||||
Creates a nicely formatted DataFrame suitable for Jupyter notebook display.
|
||||
|
||||
Args:
|
||||
show_percentages: Include percentage of total time
|
||||
|
||||
Returns:
|
||||
DataFrame with formatted columns
|
||||
"""
|
||||
return self._format_dataframe(show_percentages)
|
||||
|
||||
def to_plotly(self) -> dict:
|
||||
"""
|
||||
Convert to plotly hierarchical visualization format.
|
||||
|
||||
Returns a dictionary with data_frame, values, and path that can be directly
|
||||
used with plotly.express sunburst, treemap, or icicle charts.
|
||||
|
||||
Returns:
|
||||
Dictionary with keys: data_frame, values, path, branchvalues
|
||||
|
||||
Example:
|
||||
>>> data = breakdown.to_plotly()
|
||||
>>> import plotly.express as px
|
||||
>>>
|
||||
>>> # Create sunburst chart
|
||||
>>> fig = px.sunburst(**data)
|
||||
>>> fig.show()
|
||||
>>>
|
||||
>>> # Create treemap chart
|
||||
>>> fig = px.treemap(**data)
|
||||
>>> fig.show()
|
||||
>>>
|
||||
>>> # Create icicle chart
|
||||
>>> fig = px.icicle(**data)
|
||||
>>> fig.show()
|
||||
"""
|
||||
return self._build_plotly_data()
|
||||
|
||||
# Internal helper methods
|
||||
|
||||
def _get_total_time(self) -> int:
|
||||
"""Get total time from root ExecuteCompiler event."""
|
||||
root = self._df[self._df["depth"] == 0]
|
||||
if root.empty:
|
||||
return 0
|
||||
return int(root.iloc[0]["duration"])
|
||||
|
||||
def _format_dataframe(self, show_percentages: bool) -> pd.DataFrame:
|
||||
"""Format phase breakdown as DataFrame."""
|
||||
if self._df.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
display_rows = []
|
||||
for _, row in self._df.iterrows():
|
||||
duration_ms = row["duration"] / 1000.0
|
||||
display_row = {
|
||||
"Name": row["name"],
|
||||
"Parent": row["parent"] if row["parent"] else "(root)",
|
||||
"Depth": row["depth"],
|
||||
"Duration (ms)": duration_ms,
|
||||
}
|
||||
if show_percentages and self._total_time > 0:
|
||||
pct = row["duration"] / self._total_time * 100
|
||||
display_row["% of Total"] = pct
|
||||
display_rows.append(display_row)
|
||||
|
||||
display_df = pd.DataFrame(display_rows)
|
||||
|
||||
if show_percentages:
|
||||
display_df["% of Total"] = display_df["% of Total"].round(1)
|
||||
|
||||
return display_df
|
||||
|
||||
def _build_plotly_data(self) -> dict:
|
||||
"""Convert to plotly hierarchical visualization format."""
|
||||
return {
|
||||
"data_frame": self._df,
|
||||
"names": "name",
|
||||
"parents": "parent",
|
||||
"values": "duration",
|
||||
"branchvalues": "total",
|
||||
}
|
||||
|
||||
|
||||
# Hierarchical phase specification
|
||||
# There are over 100 totals in the JSON file, but a lot of them overlap.
|
||||
# If the children total more than their parent, we will throw a ValueError.
|
||||
#
|
||||
# The hierarchy is specified as a nested dictionary where:
|
||||
# - Keys are phase names (matching "Total <name>" events in the trace)
|
||||
# - Values are dictionaries of child phases (or empty dict {} for leaf nodes)
|
||||
# - Empty string "" as a key means "calculate Other as residual"
|
||||
#
|
||||
# This structure supports arbitrary nesting depth.
|
||||
PHASE_HIERARCHY = {
|
||||
"ExecuteCompiler": {
|
||||
"Frontend": {
|
||||
"InstantiateFunction": {},
|
||||
},
|
||||
"Backend": {
|
||||
"Optimizer": {},
|
||||
"CodeGenPasses": {},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def get_phase_breakdown(df: pd.DataFrame) -> PhaseBreakdown:
|
||||
"""
|
||||
Get hierarchical breakdown of compilation phases.
|
||||
|
||||
Returns a PhaseBreakdown object with rich display and analysis methods,
|
||||
using the pre-aggregated "Total" events from Clang's -ftime-trace output
|
||||
for accurate statistics.
|
||||
|
||||
All durations are in microseconds.
|
||||
|
||||
The hierarchy is defined by the PHASE_HIERARCHY constant and supports
|
||||
arbitrary nesting depth. The tree is traversed recursively to build
|
||||
the phase breakdown.
|
||||
|
||||
Args:
|
||||
df: DataFrame from parse_file()
|
||||
|
||||
Returns:
|
||||
PhaseBreakdown object with rich display and analysis methods
|
||||
|
||||
Raises:
|
||||
ValueError: If required Total events are missing or if calculated
|
||||
"Other" values are negative (indicating data inconsistency)
|
||||
|
||||
Examples:
|
||||
>>> df = parse_file('trace.json')
|
||||
>>> breakdown = get_phase_breakdown(df)
|
||||
>>>
|
||||
>>> # Display in Jupyter (automatic)
|
||||
>>> breakdown
|
||||
>>>
|
||||
>>> # Get total compilation time
|
||||
>>> print(f"Total: {breakdown.total_ms:.1f}ms")
|
||||
>>>
|
||||
>>> # Access specific phases
|
||||
>>> breakdown['InstantiateFunction']
|
||||
>>> breakdown.frontend
|
||||
>>> breakdown.backend.top(5)
|
||||
>>>
|
||||
>>> # Visualize
|
||||
>>> import plotly.express as px
|
||||
>>> data = breakdown.to_plotly()
|
||||
>>> fig = px.sunburst(**data)
|
||||
>>> fig.show()
|
||||
"""
|
||||
if "name" not in df.columns or "dur" not in df.columns:
|
||||
raise ValueError("DataFrame missing required 'name' or 'dur' columns")
|
||||
|
||||
# Pre-filter to Total events for efficient lookup
|
||||
total_events = df[df["name"].str.startswith("Total ", na=False)].copy()
|
||||
total_events["phase"] = total_events["name"].str.removeprefix("Total ")
|
||||
|
||||
def get_duration(phase_name: str) -> Optional[int]:
|
||||
"""Get duration in microseconds from a Total event."""
|
||||
matches = total_events[total_events["phase"] == phase_name]
|
||||
if matches.empty:
|
||||
return None
|
||||
return int(matches.iloc[0]["dur"])
|
||||
|
||||
def process_node(
|
||||
node_name: str,
|
||||
parent_name: str,
|
||||
depth: int,
|
||||
children_spec: dict,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Recursively process a node and its children in the phase hierarchy.
|
||||
|
||||
Args:
|
||||
node_name: Name of the current phase node
|
||||
parent_name: Name of the parent phase (empty string for root)
|
||||
depth: Current depth in the tree (0 for root)
|
||||
children_spec: Dictionary of child phases to process
|
||||
|
||||
Returns:
|
||||
List of row dictionaries for this node and all descendants
|
||||
|
||||
Raises:
|
||||
ValueError: If phase not found or children exceed parent duration
|
||||
"""
|
||||
# Get duration for this node
|
||||
node_duration = get_duration(node_name)
|
||||
if node_duration is None:
|
||||
raise ValueError(f"No Total {node_name} event found in trace")
|
||||
|
||||
# Add current node
|
||||
rows = [
|
||||
{
|
||||
"name": node_name,
|
||||
"parent": parent_name,
|
||||
"depth": depth,
|
||||
"duration": node_duration,
|
||||
}
|
||||
]
|
||||
|
||||
if not children_spec:
|
||||
return rows
|
||||
|
||||
# Process all children recursively
|
||||
children_total = 0
|
||||
for child_name, grandchildren_spec in children_spec.items():
|
||||
if child_name == "":
|
||||
# Empty string means "Other" - skip for now, calculate as residual
|
||||
continue
|
||||
|
||||
# Recursively process this child and its descendants
|
||||
child_rows = process_node(
|
||||
child_name, node_name, depth + 1, grandchildren_spec
|
||||
)
|
||||
rows.extend(child_rows)
|
||||
|
||||
# Track total duration of direct children only (not grandchildren)
|
||||
children_total += child_rows[0]["duration"]
|
||||
|
||||
# Calculate and add "Other" if there's unaccounted time
|
||||
other_duration = node_duration - children_total
|
||||
if other_duration < 0:
|
||||
raise ValueError(
|
||||
f"{node_name} children total ({children_total}) "
|
||||
f"exceeds parent total ({node_duration})"
|
||||
)
|
||||
|
||||
if other_duration > 0:
|
||||
rows.append(
|
||||
{
|
||||
"name": f"{node_name}_Other",
|
||||
"parent": node_name,
|
||||
"depth": depth + 1,
|
||||
"duration": other_duration,
|
||||
}
|
||||
)
|
||||
|
||||
return rows
|
||||
|
||||
# Start recursive traversal from root
|
||||
root_name = "ExecuteCompiler"
|
||||
if root_name not in PHASE_HIERARCHY:
|
||||
raise ValueError(f"Root phase '{root_name}' not found in PHASE_HIERARCHY")
|
||||
|
||||
all_rows = process_node(
|
||||
root_name,
|
||||
"", # Root has no parent
|
||||
0, # Root is at depth 0
|
||||
PHASE_HIERARCHY[root_name],
|
||||
)
|
||||
|
||||
breakdown_df = pd.DataFrame(all_rows)
|
||||
return PhaseBreakdown(breakdown_df)
|
||||
80
script/analyze_build/trace_analysis/template_analysis.py
Normal file
80
script/analyze_build/trace_analysis/template_analysis.py
Normal file
@@ -0,0 +1,80 @@
|
||||
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""
|
||||
Template instantiation analysis for Clang -ftime-trace data.
|
||||
|
||||
This module provides specialized functions for analyzing C++ template
|
||||
instantiation costs from Clang's -ftime-trace output.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from .template_parser import parse_template_detail
|
||||
|
||||
|
||||
def get_template_instantiation_events(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Filter to template instantiation events and parse arg_detail into structured columns.
|
||||
|
||||
Returns events for:
|
||||
- InstantiateFunction: Function template instantiations
|
||||
- InstantiateClass: Class template instantiations
|
||||
|
||||
The returned DataFrame includes parsed columns from arg_detail:
|
||||
- namespace: Top-level namespace (e.g., 'std', 'ck')
|
||||
- template_name: Template name without parameters
|
||||
- full_qualified_name: Full namespace::template_name
|
||||
- param_count: Number of template parameters
|
||||
- is_ck_type: Boolean indicating if this is a CK library type
|
||||
- is_nested: Boolean indicating if contains nested templates
|
||||
|
||||
Args:
|
||||
df: DataFrame from parse_file()
|
||||
|
||||
Returns:
|
||||
Filtered DataFrame containing template instantiation events with parsed columns
|
||||
|
||||
Example:
|
||||
>>> df = parse_file('trace.json')
|
||||
>>> templates = get_template_instantiation_events(df)
|
||||
>>> templates.sort_values('dur', ascending=False).head(10)
|
||||
>>> # Filter to CK types only
|
||||
>>> ck_templates = templates[templates['is_ck_type']]
|
||||
>>> # Group by template name
|
||||
>>> templates.groupby('template_name')['dur'].sum()
|
||||
"""
|
||||
# Filter to template instantiation events
|
||||
filtered_df = (
|
||||
df[
|
||||
df["name"].isin(
|
||||
[
|
||||
"InstantiateClass",
|
||||
"InstantiateFunction",
|
||||
]
|
||||
)
|
||||
]
|
||||
.drop(
|
||||
columns=[
|
||||
"arg_avg ms",
|
||||
"arg_count",
|
||||
"arg_name",
|
||||
"cat",
|
||||
"id",
|
||||
"ph",
|
||||
"pid",
|
||||
"tid",
|
||||
]
|
||||
)
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
# Parse arg_detail into structured columns
|
||||
parsed_data = filtered_df["arg_detail"].apply(parse_template_detail)
|
||||
|
||||
# Convert list of dicts to DataFrame and join with original
|
||||
parsed_df = pd.DataFrame(parsed_data.tolist())
|
||||
|
||||
# Combine with original data
|
||||
result_df = pd.concat([filtered_df, parsed_df], axis=1)
|
||||
|
||||
return result_df
|
||||
301
script/analyze_build/trace_analysis/template_parser.py
Normal file
301
script/analyze_build/trace_analysis/template_parser.py
Normal file
@@ -0,0 +1,301 @@
|
||||
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""
|
||||
Template detail string parser for C++ template instantiations.
|
||||
|
||||
This module provides functions to parse the arg_detail strings from
|
||||
Clang's -ftime-trace output into structured components.
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict
|
||||
|
||||
|
||||
def parse_template_detail(detail_str: str) -> Dict[str, any]:
|
||||
"""
|
||||
Parse a template detail string into structured components.
|
||||
|
||||
Args:
|
||||
detail_str: The arg_detail string from -ftime-trace
|
||||
|
||||
Returns:
|
||||
Dictionary with parsed fields:
|
||||
- namespace: Top-level namespace (e.g., 'std', 'ck')
|
||||
- template_name: Template name without parameters
|
||||
- full_qualified_name: Full namespace::template_name
|
||||
- param_count: Number of template parameters
|
||||
- is_ck_type: Boolean indicating if this is a CK library type
|
||||
- is_nested: Boolean indicating if contains nested templates
|
||||
|
||||
Example:
|
||||
>>> parse_template_detail('std::basic_string<char>')
|
||||
{
|
||||
'namespace': 'std',
|
||||
'template_name': 'basic_string',
|
||||
'full_qualified_name': 'std::basic_string',
|
||||
'param_count': 1,
|
||||
'is_ck_type': False,
|
||||
'is_nested': False
|
||||
}
|
||||
"""
|
||||
# Handle empty or invalid strings
|
||||
if not detail_str or not isinstance(detail_str, str):
|
||||
return _empty_result()
|
||||
|
||||
# Remove surrounding quotes if present
|
||||
detail_str = detail_str.strip('"')
|
||||
|
||||
# Extract components
|
||||
namespace = extract_namespace(detail_str)
|
||||
template_name = extract_template_name(detail_str)
|
||||
full_qualified_name = extract_full_qualified_name(detail_str)
|
||||
param_count = count_template_params(detail_str)
|
||||
is_ck = is_ck_template(detail_str)
|
||||
is_nested = is_nested_template(detail_str)
|
||||
|
||||
return {
|
||||
"namespace": namespace,
|
||||
"template_name": template_name,
|
||||
"full_qualified_name": full_qualified_name,
|
||||
"param_count": param_count,
|
||||
"is_ck_type": is_ck,
|
||||
"is_nested": is_nested,
|
||||
}
|
||||
|
||||
|
||||
def extract_namespace(detail_str: str) -> str:
|
||||
"""
|
||||
Extract the top-level namespace from a template detail string.
|
||||
|
||||
Args:
|
||||
detail_str: The template detail string
|
||||
|
||||
Returns:
|
||||
The top-level namespace, or empty string if none found
|
||||
|
||||
Example:
|
||||
>>> extract_namespace('std::basic_string<char>')
|
||||
'std'
|
||||
>>> extract_namespace('ck::tensor_operation::device::DeviceConv2d<...>')
|
||||
'ck'
|
||||
"""
|
||||
if not detail_str:
|
||||
return ""
|
||||
|
||||
# Remove quotes
|
||||
detail_str = detail_str.strip('"')
|
||||
|
||||
# Find first :: separator
|
||||
match = re.match(r"^([a-zA-Z_][a-zA-Z0-9_]*)::", detail_str)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
# No namespace found - check if it's a simple type
|
||||
match = re.match(r"^([a-zA-Z_][a-zA-Z0-9_]*)", detail_str)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def extract_template_name(detail_str: str) -> str:
|
||||
"""
|
||||
Extract the template name without namespace or parameters.
|
||||
|
||||
Args:
|
||||
detail_str: The template detail string
|
||||
|
||||
Returns:
|
||||
The template name without namespace or parameters
|
||||
|
||||
Example:
|
||||
>>> extract_template_name('std::basic_string<char>')
|
||||
'basic_string'
|
||||
>>> extract_template_name('ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3<...>')
|
||||
'GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3'
|
||||
"""
|
||||
if not detail_str:
|
||||
return ""
|
||||
|
||||
# Remove quotes
|
||||
detail_str = detail_str.strip('"')
|
||||
|
||||
# Find the last component before < or end of string
|
||||
# This handles nested namespaces like ck::tensor_operation::device::DeviceConv2d
|
||||
match = re.search(r"::([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:<|$)", detail_str)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
# No :: found, try to get name before <
|
||||
match = re.match(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:<|$)", detail_str)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def extract_full_qualified_name(detail_str: str) -> str:
|
||||
"""
|
||||
Extract the full qualified name (namespace::...::template_name).
|
||||
|
||||
Args:
|
||||
detail_str: The template detail string
|
||||
|
||||
Returns:
|
||||
The full qualified name without template parameters
|
||||
|
||||
Example:
|
||||
>>> extract_full_qualified_name('std::basic_string<char>')
|
||||
'std::basic_string'
|
||||
>>> extract_full_qualified_name('ck::tensor_operation::device::DeviceConv2d<...>')
|
||||
'ck::tensor_operation::device::DeviceConv2d'
|
||||
"""
|
||||
if not detail_str:
|
||||
return ""
|
||||
|
||||
# Remove quotes
|
||||
detail_str = detail_str.strip('"')
|
||||
|
||||
# Match everything up to the first < or end of string
|
||||
match = re.match(r"^([a-zA-Z_:][a-zA-Z0-9_:]*)\s*(?:<|$)", detail_str)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def count_template_params(detail_str: str) -> int:
|
||||
"""
|
||||
Count the number of top-level template parameters.
|
||||
|
||||
This counts commas at the top level of template brackets,
|
||||
not commas inside nested templates.
|
||||
|
||||
Args:
|
||||
detail_str: The template detail string
|
||||
|
||||
Returns:
|
||||
Number of template parameters, or 0 if not a template
|
||||
|
||||
Example:
|
||||
>>> count_template_params('std::basic_string<char>')
|
||||
1
|
||||
>>> count_template_params('std::tuple<int, float, double>')
|
||||
3
|
||||
"""
|
||||
if not detail_str or "<" not in detail_str:
|
||||
return 0
|
||||
|
||||
# Remove quotes
|
||||
detail_str = detail_str.strip('"')
|
||||
|
||||
# Find the template parameter section
|
||||
start = detail_str.find("<")
|
||||
if start == -1:
|
||||
return 0
|
||||
|
||||
# Track bracket depth to only count top-level commas
|
||||
depth = 0
|
||||
param_count = 1 # Start with 1 (if there's a <, there's at least one param)
|
||||
in_template = False
|
||||
|
||||
for i in range(start, len(detail_str)):
|
||||
char = detail_str[i]
|
||||
|
||||
if char == "<":
|
||||
depth += 1
|
||||
in_template = True
|
||||
elif char == ">":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
# We've closed the outermost template
|
||||
break
|
||||
elif char == "," and depth == 1:
|
||||
# Top-level comma
|
||||
param_count += 1
|
||||
|
||||
return param_count if in_template else 0
|
||||
|
||||
|
||||
def is_ck_template(detail_str: str) -> bool:
|
||||
"""
|
||||
Check if this is a CK library template.
|
||||
|
||||
Args:
|
||||
detail_str: The template detail string
|
||||
|
||||
Returns:
|
||||
True if this is a CK library type, False otherwise
|
||||
|
||||
Example:
|
||||
>>> is_ck_template('ck::tensor_operation::device::DeviceConv2d<...>')
|
||||
True
|
||||
>>> is_ck_template('std::basic_string<char>')
|
||||
False
|
||||
"""
|
||||
if not detail_str:
|
||||
return False
|
||||
|
||||
# Remove quotes
|
||||
detail_str = detail_str.strip('"')
|
||||
|
||||
# Check if it starts with ck:: or contains ::ck::
|
||||
return detail_str.startswith("ck::") or "::ck::" in detail_str
|
||||
|
||||
|
||||
def is_nested_template(detail_str: str) -> bool:
|
||||
"""
|
||||
Check if this template contains nested template instantiations.
|
||||
|
||||
Args:
|
||||
detail_str: The template detail string
|
||||
|
||||
Returns:
|
||||
True if contains nested templates, False otherwise
|
||||
|
||||
Example:
|
||||
>>> is_nested_template('std::vector<int>')
|
||||
False
|
||||
>>> is_nested_template('std::vector<std::string>')
|
||||
True
|
||||
"""
|
||||
if not detail_str or "<" not in detail_str:
|
||||
return False
|
||||
|
||||
# Remove quotes
|
||||
detail_str = detail_str.strip('"')
|
||||
|
||||
# Find the template parameter section
|
||||
start = detail_str.find("<")
|
||||
if start == -1:
|
||||
return False
|
||||
|
||||
# Look for nested < after the first one
|
||||
depth = 0
|
||||
for i in range(start, len(detail_str)):
|
||||
char = detail_str[i]
|
||||
|
||||
if char == "<":
|
||||
depth += 1
|
||||
if depth > 1:
|
||||
# Found a nested template
|
||||
return True
|
||||
elif char == ">":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
break
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _empty_result() -> Dict[str, any]:
|
||||
"""Return an empty result dictionary with default values."""
|
||||
return {
|
||||
"namespace": "",
|
||||
"template_name": "",
|
||||
"full_qualified_name": "",
|
||||
"param_count": 0,
|
||||
"is_ck_type": False,
|
||||
"is_nested": False,
|
||||
}
|
||||
Reference in New Issue
Block a user