mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
This PR introduces a Python toolkit for analyzing Clang's `-ftime-trace` build performance data. This is the foundation for our systematic effort to reduce CK and CK-Tile build times (#3575).
The toolkit provides fast parsing of trace JSON files into pandas DataFrames using orjson, with specialized functions for analyzing template instantiation costs and compilation phase breakdowns. It includes a core library (`trace_analysis/`), example scripts for quick analysis, a comprehensive README with usage documentation, and an interactive Jupyter notebook demonstration.
Key features include memory-efficient DataFrame schemas with optimized dtypes, recursive hierarchical phase analysis, automatic metadata extraction (source file, compilation timing), and template instantiation filtering. The design supports both standalone scripts and interactive Jupyter notebook workflows.
This single-file analysis capability lays the groundwork for future multi-file analysis across thousands of compilation units, enabling data-driven optimization and build time regression detection.
[ROCm/composable_kernel commit: a213ce676b]
248 lines
6.7 KiB
Plaintext
248 lines
6.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Template Instantiation Analysis Example\n",
|
|
"\n",
|
|
"This notebook demonstrates how to use the template analysis functions to understand C++ template instantiation costs in Clang's `-ftime-trace` output.\n",
|
|
"\n",
|
|
"## Setup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sys\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"# Add parent directory to path\n",
|
|
"sys.path.insert(0, str(Path.cwd().parent))\n",
|
|
"\n",
|
|
"from trace_analysis import (\n",
|
|
" parse_file,\n",
|
|
" get_template_instantiation_events,\n",
|
|
" get_phase_breakdown,\n",
|
|
" get_metadata,\n",
|
|
")\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"from datetime import datetime\n",
|
|
"import plotly.express as px\n",
|
|
"\n",
|
|
"\n",
|
|
"# Display settings\n",
|
|
"pd.set_option(\"display.max_rows\", 100)\n",
|
|
"pd.set_option(\"display.max_columns\", None)\n",
|
|
"pd.set_option(\"display.width\", None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Load Trace File"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load your trace file\n",
|
|
"trace_file = Path(\n",
|
|
" \"../../../build-trace/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.json\"\n",
|
|
")\n",
|
|
"df = parse_file(trace_file)\n",
|
|
"\n",
|
|
"print(f\"Total events: {len(df):,}\")\n",
|
|
"starting_timestamp = datetime.fromtimestamp(df.attrs[\"beginningOfTime\"] / 1e6)\n",
|
|
"print(f\"Starting timestamp: {starting_timestamp.strftime('%Y-%m-%d:%H:%M:%S')}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"get_metadata(df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Compilation Overview"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Get phase breakdown and display it\n",
|
|
"breakdown = get_phase_breakdown(df)\n",
|
|
"print(breakdown)\n",
|
|
"display(breakdown)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Extract data for plotly charts (sunburst, tree-map, or icicle)\n",
|
|
"plotly_data = breakdown.to_plotly()\n",
|
|
"fig = px.sunburst(**plotly_data)\n",
|
|
"fig.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Template Instantiation Analysis"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Get all template instantiation events (now with parsed columns!)\n",
|
|
"template_events = get_template_instantiation_events(df)\n",
|
|
"\n",
|
|
"print(f\"Total template instantiation events: {len(template_events):,}\")\n",
|
|
"print(f\"Total template time: {template_events['dur'].sum() / 1000:.1f} ms\")\n",
|
|
"display(template_events)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Examine Parsed Columns\n",
|
|
"\n",
|
|
"The `get_template_instantiation_events()` function automatically parses the `arg_detail` column into structured fields:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Show the new parsed columns\n",
|
|
"print(\"Parsed columns available:\")\n",
|
|
"print(\"- namespace: Top-level namespace (e.g., 'std', 'ck')\")\n",
|
|
"print(\"- template_name: Template name without parameters\")\n",
|
|
"print(\"- full_qualified_name: Full namespace::template_name\")\n",
|
|
"print(\"- param_count: Number of template parameters\")\n",
|
|
"print(\"- is_ck_type: Boolean indicating CK library types\")\n",
|
|
"print(\"- is_nested: Boolean indicating nested templates\")\n",
|
|
"print()\n",
|
|
"\n",
|
|
"# Display sample of parsed data\n",
|
|
"template_events[\n",
|
|
" [\"namespace\", \"template_name\", \"param_count\", \"is_ck_type\", \"is_nested\", \"dur\"]\n",
|
|
"].head(20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Analysis by Namespace"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Group by namespace to see where time is spent\n",
|
|
"namespace_summary = (\n",
|
|
" template_events.groupby(\"namespace\")\n",
|
|
" .agg({\"dur\": [\"count\", \"sum\", \"mean\"], \"param_count\": \"mean\"})\n",
|
|
" .round(2)\n",
|
|
")\n",
|
|
"\n",
|
|
"namespace_summary.columns = [\"count\", \"total_dur\", \"avg_dur\", \"avg_params\"]\n",
|
|
"namespace_summary[\"total_ms\"] = namespace_summary[\"total_dur\"] / 1000\n",
|
|
"namespace_summary = namespace_summary.sort_values(\"total_dur\", ascending=False)\n",
|
|
"\n",
|
|
"print(\"\\nTemplate Instantiation Time by Namespace:\")\n",
|
|
"display(namespace_summary)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### CK Library Templates Analysis"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Filter to CK types only\n",
|
|
"ck_templates = template_events[template_events[\"is_ck_type\"]].copy()\n",
|
|
"\n",
|
|
"print(f\"CK template instantiations: {len(ck_templates):,}\")\n",
|
|
"print(f\"CK template time: {ck_templates['dur'].sum() / 1000:.1f} ms\")\n",
|
|
"print(\n",
|
|
" f\"Percentage of total template time: {100 * ck_templates['dur'].sum() / template_events['dur'].sum():.1f}%\"\n",
|
|
")\n",
|
|
"print()\n",
|
|
"\n",
|
|
"# Top CK templates by time\n",
|
|
"ck_by_name = (\n",
|
|
" ck_templates.groupby(\"template_name\")\n",
|
|
" .agg({\"dur\": [\"count\", \"sum\", \"mean\"]})\n",
|
|
" .round(2)\n",
|
|
")\n",
|
|
"ck_by_name.columns = [\"count\", \"total_dur\", \"avg_dur\"]\n",
|
|
"ck_by_name[\"total_ms\"] = ck_by_name[\"total_dur\"] / 1000\n",
|
|
"ck_by_name = ck_by_name.sort_values(\"total_dur\", ascending=False)\n",
|
|
"\n",
|
|
"print(\"\\nTop CK Templates by Total Time:\")\n",
|
|
"display(ck_by_name.head(20))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|