mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 13:41:24 +00:00
Add multi-file trace parsing and analysis pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends build time analysis from ROCm/composable_kernel#3644 to handle multiple trace files across build directories (see #4229): - pipeline.py: Generic pipeline framework with fluent interface for composable data processing. Provides parallel processing, progress tracking, and error handling independent of trace-specific code. Processes thousands of trace files at default resolution in minutes, aggregating results into in-memory DataFrames for analysis. - parse_build.py: Parse all trace files in a build directory - build_analysis_example.ipynb: Demonstrates pipeline aggregation across all build files The pipeline design improves capability (composable operations), performance (parallel processing), and user-friendliness (fluent API) of the analysis modules. It enables analyzing compilation patterns across the entire codebase with all trace data available in pandas DataFrames for interactive exploration.
252 lines
6.8 KiB
Plaintext
252 lines
6.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Template Instantiation Analysis Example\n",
|
|
"\n",
|
|
"This notebook demonstrates how to use the template analysis functions to understand C++ template instantiation costs in Clang's `-ftime-trace` output.\n",
|
|
"\n",
|
|
"## Setup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%load_ext autoreload\n",
|
|
"%autoreload 2\n",
|
|
"\n",
|
|
"import sys\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"# Add parent directory to path\n",
|
|
"sys.path.insert(0, str(Path.cwd().parent))\n",
|
|
"\n",
|
|
"from trace_analysis import (\n",
|
|
" parse_file,\n",
|
|
" get_template_instantiation_events,\n",
|
|
" get_phase_breakdown,\n",
|
|
" get_metadata,\n",
|
|
")\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"from datetime import datetime\n",
|
|
"import plotly.express as px\n",
|
|
"\n",
|
|
"\n",
|
|
"# Display settings\n",
|
|
"pd.set_option(\"display.max_rows\", 100)\n",
|
|
"pd.set_option(\"display.max_columns\", None)\n",
|
|
"pd.set_option(\"display.width\", None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Load Trace File"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load your trace file\n",
|
|
"trace_file = Path(\n",
|
|
" \"../../../build-trace/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.json\"\n",
|
|
")\n",
|
|
"df = parse_file(trace_file)\n",
|
|
"\n",
|
|
"print(f\"Total events: {len(df):,}\")\n",
|
|
"starting_timestamp = datetime.fromtimestamp(df.attrs[\"beginningOfTime\"] / 1e6)\n",
|
|
"print(f\"Starting timestamp: {starting_timestamp.strftime('%Y-%m-%d:%H:%M:%S')}\")\n",
|
|
"print(f\"Source file: {df.attrs['sourceFile']}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"get_metadata(df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Compilation Overview"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Get phase breakdown and display it\n",
|
|
"breakdown = get_phase_breakdown(df)\n",
|
|
"print(breakdown)\n",
|
|
"display(breakdown)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Extract data for plotly charts (sunburst, tree-map, or icicle)\n",
|
|
"plotly_data = breakdown.to_plotly()\n",
|
|
"fig = px.sunburst(**plotly_data)\n",
|
|
"fig.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Template Instantiation Analysis"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Get all template instantiation events (now with parsed columns!)\n",
|
|
"template_events = get_template_instantiation_events(df)\n",
|
|
"\n",
|
|
"print(f\"Total template instantiation events: {len(template_events):,}\")\n",
|
|
"print(f\"Total template time: {template_events['dur'].sum() / 1000:.1f} ms\")\n",
|
|
"display(template_events)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Examine Parsed Columns\n",
|
|
"\n",
|
|
"The `get_template_instantiation_events()` function automatically parses the `arg_detail` column into structured fields:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Show the new parsed columns\n",
|
|
"print(\"Parsed columns available:\")\n",
|
|
"print(\"- namespace: Top-level namespace (e.g., 'std', 'ck')\")\n",
|
|
"print(\"- template_name: Template name without parameters\")\n",
|
|
"print(\"- full_qualified_name: Full namespace::template_name\")\n",
|
|
"print(\"- param_count: Number of template parameters\")\n",
|
|
"print(\"- is_ck_type: Boolean indicating CK library types\")\n",
|
|
"print(\"- is_nested: Boolean indicating nested templates\")\n",
|
|
"print()\n",
|
|
"\n",
|
|
"# Display sample of parsed data\n",
|
|
"template_events[\n",
|
|
" [\"namespace\", \"template_name\", \"param_count\", \"is_ck_type\", \"is_nested\", \"dur\"]\n",
|
|
"].head(20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Analysis by Namespace"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Group by namespace to see where time is spent\n",
|
|
"namespace_summary = (\n",
|
|
" template_events.groupby(\"namespace\")\n",
|
|
" .agg({\"dur\": [\"count\", \"sum\", \"mean\"], \"param_count\": \"mean\"})\n",
|
|
" .round(2)\n",
|
|
")\n",
|
|
"\n",
|
|
"namespace_summary.columns = [\"count\", \"total_dur\", \"avg_dur\", \"avg_params\"]\n",
|
|
"namespace_summary[\"total_ms\"] = namespace_summary[\"total_dur\"] / 1000\n",
|
|
"namespace_summary = namespace_summary.sort_values(\"total_dur\", ascending=False)\n",
|
|
"\n",
|
|
"print(\"\\nTemplate Instantiation Time by Namespace:\")\n",
|
|
"display(namespace_summary)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### CK Library Templates Analysis"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Filter to CK types only\n",
|
|
"ck_templates = template_events[template_events[\"is_ck_type\"]].copy()\n",
|
|
"\n",
|
|
"print(f\"CK template instantiations: {len(ck_templates):,}\")\n",
|
|
"print(f\"CK template time: {ck_templates['dur'].sum() / 1000:.1f} ms\")\n",
|
|
"print(\n",
|
|
" f\"Percentage of total template time: {100 * ck_templates['dur'].sum() / template_events['dur'].sum():.1f}%\"\n",
|
|
")\n",
|
|
"print()\n",
|
|
"\n",
|
|
"# Top CK templates by time\n",
|
|
"ck_by_name = (\n",
|
|
" ck_templates.groupby(\"template_name\")\n",
|
|
" .agg({\"dur\": [\"count\", \"sum\", \"mean\"]})\n",
|
|
" .round(2)\n",
|
|
")\n",
|
|
"ck_by_name.columns = [\"count\", \"total_dur\", \"avg_dur\"]\n",
|
|
"ck_by_name[\"total_ms\"] = ck_by_name[\"total_dur\"] / 1000\n",
|
|
"ck_by_name = ck_by_name.sort_values(\"total_dur\", ascending=False)\n",
|
|
"\n",
|
|
"print(\"\\nTop CK Templates by Total Time:\")\n",
|
|
"display(ck_by_name.head(20))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|