mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
Extends build time analysis from ROCm/composable_kernel#3644 to handle multiple trace files across build directories (see #4229): - pipeline.py: Generic pipeline framework with fluent interface for composable data processing. Provides parallel processing, progress tracking, and error handling independent of trace-specific code. Processes thousands of trace files at default resolution in minutes, aggregating results into in-memory DataFrames for analysis. - parse_build.py: Parse all trace files in a build directory - build_analysis_example.ipynb: Demonstrates pipeline aggregation across all build files The pipeline design improves capability (composable operations), performance (parallel processing), and user-friendliness (fluent API) of the analysis modules. It enables analyzing compilation patterns across the entire codebase with all trace data available in pandas DataFrames for interactive exploration. --- 🔁 Imported from [ROCm/composable_kernel#3704](https://github.com/ROCm/composable_kernel/pull/3704) 🧑💻 Originally authored by @shumway Co-authored-by: John Shumway <jshumway@amd.com> Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
252 lines
6.8 KiB
Plaintext
252 lines
6.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Template Instantiation Analysis Example\n",
|
|
"\n",
|
|
"This notebook demonstrates how to use the template analysis functions to understand C++ template instantiation costs in Clang's `-ftime-trace` output.\n",
|
|
"\n",
|
|
"## Setup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%load_ext autoreload\n",
|
|
"%autoreload 2\n",
|
|
"\n",
|
|
"import sys\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"# Add parent directory to path\n",
|
|
"sys.path.insert(0, str(Path.cwd().parent))\n",
|
|
"\n",
|
|
"from trace_analysis import (\n",
|
|
" parse_file,\n",
|
|
" get_template_instantiation_events,\n",
|
|
" get_phase_breakdown,\n",
|
|
" get_metadata,\n",
|
|
")\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"from datetime import datetime\n",
|
|
"import plotly.express as px\n",
|
|
"\n",
|
|
"\n",
|
|
"# Display settings\n",
|
|
"pd.set_option(\"display.max_rows\", 100)\n",
|
|
"pd.set_option(\"display.max_columns\", None)\n",
|
|
"pd.set_option(\"display.width\", None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Load Trace File"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load your trace file\n",
|
|
"trace_file = Path(\n",
|
|
" \"../../../build-trace/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.json\"\n",
|
|
")\n",
|
|
"df = parse_file(trace_file)\n",
|
|
"\n",
|
|
"print(f\"Total events: {len(df):,}\")\n",
|
|
"starting_timestamp = datetime.fromtimestamp(df.attrs[\"beginningOfTime\"] / 1e6)\n",
|
|
"print(f\"Starting timestamp: {starting_timestamp.strftime('%Y-%m-%d:%H:%M:%S')}\")\n",
|
|
"print(f\"Source file: {df.attrs['sourceFile']}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"get_metadata(df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Compilation Overview"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Get phase breakdown and display it\n",
|
|
"breakdown = get_phase_breakdown(df)\n",
|
|
"print(breakdown)\n",
|
|
"display(breakdown)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Extract data for plotly charts (sunburst, tree-map, or icicle)\n",
|
|
"plotly_data = breakdown.to_plotly()\n",
|
|
"fig = px.sunburst(**plotly_data)\n",
|
|
"fig.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Template Instantiation Analysis"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Get all template instantiation events (now with parsed columns!)\n",
|
|
"template_events = get_template_instantiation_events(df)\n",
|
|
"\n",
|
|
"print(f\"Total template instantiation events: {len(template_events):,}\")\n",
|
|
"print(f\"Total template time: {template_events['dur'].sum() / 1000:.1f} ms\")\n",
|
|
"display(template_events)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Examine Parsed Columns\n",
|
|
"\n",
|
|
"The `get_template_instantiation_events()` function automatically parses the `arg_detail` column into structured fields:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Show the new parsed columns\n",
|
|
"print(\"Parsed columns available:\")\n",
|
|
"print(\"- namespace: Top-level namespace (e.g., 'std', 'ck')\")\n",
|
|
"print(\"- template_name: Template name without parameters\")\n",
|
|
"print(\"- full_qualified_name: Full namespace::template_name\")\n",
|
|
"print(\"- param_count: Number of template parameters\")\n",
|
|
"print(\"- is_ck_type: Boolean indicating CK library types\")\n",
|
|
"print(\"- is_nested: Boolean indicating nested templates\")\n",
|
|
"print()\n",
|
|
"\n",
|
|
"# Display sample of parsed data\n",
|
|
"template_events[\n",
|
|
" [\"namespace\", \"template_name\", \"param_count\", \"is_ck_type\", \"is_nested\", \"dur\"]\n",
|
|
"].head(20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Analysis by Namespace"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Group by namespace to see where time is spent\n",
|
|
"namespace_summary = (\n",
|
|
" template_events.groupby(\"namespace\")\n",
|
|
" .agg({\"dur\": [\"count\", \"sum\", \"mean\"], \"param_count\": \"mean\"})\n",
|
|
" .round(2)\n",
|
|
")\n",
|
|
"\n",
|
|
"namespace_summary.columns = [\"count\", \"total_dur\", \"avg_dur\", \"avg_params\"]\n",
|
|
"namespace_summary[\"total_ms\"] = namespace_summary[\"total_dur\"] / 1000\n",
|
|
"namespace_summary = namespace_summary.sort_values(\"total_dur\", ascending=False)\n",
|
|
"\n",
|
|
"print(\"\\nTemplate Instantiation Time by Namespace:\")\n",
|
|
"display(namespace_summary)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### CK Library Templates Analysis"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Filter to CK types only\n",
|
|
"ck_templates = template_events[template_events[\"is_ck_type\"]].copy()\n",
|
|
"\n",
|
|
"print(f\"CK template instantiations: {len(ck_templates):,}\")\n",
|
|
"print(f\"CK template time: {ck_templates['dur'].sum() / 1000:.1f} ms\")\n",
|
|
"print(\n",
|
|
" f\"Percentage of total template time: {100 * ck_templates['dur'].sum() / template_events['dur'].sum():.1f}%\"\n",
|
|
")\n",
|
|
"print()\n",
|
|
"\n",
|
|
"# Top CK templates by time\n",
|
|
"ck_by_name = (\n",
|
|
" ck_templates.groupby(\"template_name\")\n",
|
|
" .agg({\"dur\": [\"count\", \"sum\", \"mean\"]})\n",
|
|
" .round(2)\n",
|
|
")\n",
|
|
"ck_by_name.columns = [\"count\", \"total_dur\", \"avg_dur\"]\n",
|
|
"ck_by_name[\"total_ms\"] = ck_by_name[\"total_dur\"] / 1000\n",
|
|
"ck_by_name = ck_by_name.sort_values(\"total_dur\", ascending=False)\n",
|
|
"\n",
|
|
"print(\"\\nTop CK Templates by Total Time:\")\n",
|
|
"display(ck_by_name.head(20))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|