Merge commit '8654c0628f83261d3dd64cfb4ec80e9dd2b29fa5' into develop

2026-05-15 10:37:44 +00:00 · 2026-01-26 22:14:16 +00:00
parent 63dde06485
commit 65be39bfd1
9 changed files with 1655 additions and 2 deletions
--- a/4
+++ b/4
@@ -39,10 +39,10 @@ def sendFailureNotifications() {
    // Error patterns to scan build logs for specific failure types and send detailed notifications.
    def failurePatterns = [
        [pattern: /login attempt to .* failed with status: 401 Unauthorized/, description: "Docker registry authentication failed"],
-        [pattern: /docker login failed/, description: "Docker login failed"],
+        [pattern: /(.*)docker login failed(.*)/, description: "Docker login failed"],
        [pattern: /HTTP request sent .* 404 Not Found/, description: "HTTP request failed with 404"],
        [pattern: /cat: .* No such file or directory/, description: "GPU not found"],
-        [pattern: /GPU not found/, description: "GPU not found"],
+        [pattern: /(.*)GPU not found(.*)/, description: "GPU not found"],
        [pattern: /Could not connect to Redis at .* Connection timed out/, description: "Redis connection timed out"]
    ]
    
--- a/script/analyze_build/README.md
+++ b/script/analyze_build/README.md
@@ -0,0 +1,263 @@
+# Build Trace Analysis
+
+Simple to use, fast python tools for analyzing Clang `-ftime-trace` build performance data.
+
+## Overview
+
+We're kicking off a systematic effort to dramatically reduce CK and CK-Tile build times, [#3575](https://github.com/ROCm/composable_kernel/issues/3575). A key part of this work is improving our C++ metaprogramming to reduce the burden on the compiler.
+
+In order to prioritize work and measure our progress, we need data on template instantiation. For single files, Clang's `-ftime-trace` build performance data is easy to analyze with the Perfetto UI. The problem we are solving here is how to analyze instantiation data across thousands of compilation units.
+
+The python code in this directory provides helper functions to quickly load JSON files into pandas DataFrames that can be used for analysis in Jupyter notebooks.
+
+## Directory Structure
+
+```
+script/analyze_build/
+├── trace_analysis/              # Core library
+│   ├── __init__.py              # Main exports
+│   ├── parse_file.py            # Fast parsing of JSON trace files
+│   ├── template_analysis.py     # Template instantiation analysis
+│   ├── template_parser.py       # Template name parsing utilities
+│   └── phase_breakdown.py       # Compilation phase breakdown
+├── notebooks/                   # Jupyter notebooks for analysis
+│   └── file_analysis_example.ipynb  # Template analysis example
+├── requirements.txt             # Python dependencies
+└── README.md                    # This file
+```
+
+## Python Requirements
+
+See `requirements.txt` for the complete list of dependencies:
+* **pandas** - DataFrame manipulation and analysis
+* **orjson** - Fast JSON parsing for trace files
+* **plotly** - Interactive visualizations (sunburst, treemap)
+* **nbformat** - Jupyter notebook format support
+* **ipykernel** - Kernel for running notebooks in VSCode/Jupyter
+* **kaleido** - Static image export from Plotly charts
+* **jupyter** - Full Jupyter environment
+
+## Quick Start
+
+### Setup
+
+1. Create a virtual environment (recommended):
+```bash
+cd script/analyze_build
+python3 -m venv .venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+```
+
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+
+3. Install VSCode extensions if you want to run notebooks in VSCode:
+   * Jupyter
+   * Data Wrangler (interact with Pandas DataFrames)
+
+### Analyzing a Single File
+
+Use the `parse_file` function to load a `-ftime-trace` JSON file into a Pandas DataFrame:
+
+```python
+from trace_analysis import parse_file
+
+# Parse the trace file
+df = parse_file('path/to/trace.json')
+
+# View basic info
+print(f"Total events: {len(df)}")
+print(df.columns)
+
+# Analyze duration statistics
+print(df['dur'].describe())
+```
+
+### Extracting Compilation Metadata
+
+Get high-level metadata about the compilation:
+
+```python
+from trace_analysis import get_metadata
+
+# Extract metadata from trace file
+metadata = get_metadata('trace.json')
+
+print(f"Source file: {metadata['source_file']}")
+print(f"Compilation time: {metadata['total_wall_time_s']:.2f}s")
+print(f"Started: {metadata['wall_start_datetime']}")
+print(f"Ended: {metadata['wall_end_datetime']}")
+```
+
+The metadata includes:
+- `source_file`: Main .cpp/.c file being compiled
+- `time_granularity`: Time unit used ("microseconds")
+- `beginning_of_time`: Epoch timestamp in microseconds
+- `wall_start_time`: Wall clock start (microseconds since epoch)
+- `wall_end_time`: Wall clock end (microseconds since epoch)
+- `wall_start_datetime`: Human-readable start time
+- `wall_end_datetime`: Human-readable end time
+- `total_wall_time_us`: Total compilation time in microseconds
+- `total_wall_time_s`: Total compilation time in seconds
+
+### Template Instantiation Analysis
+
+The module includes specialized functions for analyzing C++ template instantiation costs:
+
+```python
+from trace_analysis import (
+    parse_file,
+    get_template_instantiation_events,
+    get_phase_breakdown,
+)
+
+df = parse_file('trace.json')
+
+# Get all template instantiation events with parsed template information
+template_events = get_template_instantiation_events(df)
+
+# The returned DataFrame includes parsed columns:
+# - namespace: Top-level namespace (e.g., 'std', 'ck')
+# - template_name: Template name without parameters
+# - full_qualified_name: Full namespace::template_name
+# - param_count: Number of template parameters
+# - is_ck_type: Boolean indicating CK library types
+# - is_nested: Boolean indicating nested templates
+
+# Find slowest template instantiations
+top_templates = template_events.nlargest(20, 'dur')
+print(top_templates[['template_name', 'namespace', 'param_count', 'dur']])
+
+# Analyze by namespace
+namespace_summary = template_events.groupby('namespace').agg({
+    'dur': ['count', 'sum', 'mean']
+})
+print(namespace_summary)
+```
+
+### Compilation Phase Breakdown
+
+Analyze how compilation time is distributed across different phases:
+
+```python
+from trace_analysis import get_phase_breakdown, PhaseBreakdown
+
+df = parse_file('trace.json')
+
+# Get hierarchical phase breakdown
+breakdown = get_phase_breakdown(df)
+
+# Display in Jupyter (automatic rich HTML display)
+display(breakdown)
+
+# Print text representation
+print(breakdown)
+
+# Access the underlying DataFrame
+print(breakdown.df)
+
+# Convert to plotly format for visualization
+import plotly.express as px
+data = breakdown.to_plotly()
+fig = px.sunburst(**data)
+fig.show()
+```
+
+The `PhaseBreakdown` class provides:
+- Hierarchical breakdown of compilation phases
+- Automatic calculation of "Other" residual time at each level
+- Validation that children don't exceed parent durations
+- Multiple output formats (text, DataFrame, Plotly)
+
+## DataFrame Schema
+
+The parsed DataFrame contains the following columns from the `-ftime-trace` format:
+
+- `name`: Event name (function, template instantiation, etc.)
+- `ph`: Phase character ('X' for complete, 'B' for begin, 'E' for end, 'i' for instant)
+- `ts`: Timestamp in microseconds
+- `dur`: Duration in microseconds (for complete events)
+- `pid`: Process ID
+- `tid`: Thread ID
+- `arg_*`: Flattened arguments from the event's `args` field
+
+### Template Event Columns
+
+When using `get_template_instantiation_events()`, additional parsed columns are included:
+
+- `namespace`: Top-level namespace extracted from the template name
+- `template_name`: Template name without namespace or parameters
+- `full_qualified_name`: Complete namespace::template_name
+- `param_count`: Number of template parameters
+- `is_ck_type`: Boolean flag for CK library types (namespace starts with 'ck')
+- `is_nested`: Boolean flag indicating nested template instantiations
+
+## Use in Jupyter Notebooks
+
+The module is designed to work seamlessly in Jupyter notebooks. See `notebooks/file_analysis_example.ipynb` for a complete example workflow that demonstrates:
+
+- Loading and parsing trace files
+- Extracting compilation metadata
+- Analyzing phase breakdown with visualizations
+- Template instantiation analysis with parsed columns
+- Filtering and grouping by namespace
+- Identifying CK-specific template costs
+
+To use in a notebook:
+
+```python
+import sys
+from pathlib import Path
+
+# Add trace_analysis to path
+sys.path.insert(0, str(Path.cwd().parent))
+
+from trace_analysis import (
+    parse_file,
+    get_metadata,
+    get_template_instantiation_events,
+    get_phase_breakdown,
+)
+
+# Load and analyze
+df = parse_file('path/to/trace.json')
+breakdown = get_phase_breakdown(df)
+templates = get_template_instantiation_events(df)
+
+# Visualize
+import plotly.express as px
+fig = px.sunburst(**breakdown.to_plotly())
+fig.show()
+```
+
+## API Reference
+
+### Core Functions
+
+- `parse_file(filepath)`: Parse a `-ftime-trace` JSON file into a pandas DataFrame
+- `get_metadata(filepath_or_df)`: Extract compilation metadata from trace file or DataFrame
+
+### Template Analysis
+
+- `get_template_instantiation_events(df)`: Filter to template instantiation events with parsed template information
+
+### Phase Breakdown
+
+- `get_phase_breakdown(df)`: Generate hierarchical compilation phase breakdown
+- `PhaseBreakdown`: Class representing phase breakdown with multiple output formats
+
+## Contributing
+
+This is an experimental project for analyzing and improving C++ metaprogramming build times. Contributions are welcome! When adding new analysis functions:
+
+1. Add the function to the appropriate module in `trace_analysis/`
+2. Export it in `__init__.py`
+3. Update this README with usage examples
+4. Consider adding a notebook example if the feature is substantial
+
+## License
+
+Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
+SPDX-License-Identifier: MIT
--- a/script/analyze_build/notebooks/file_analysis_example.ipynb
+++ b/script/analyze_build/notebooks/file_analysis_example.ipynb
@@ -0,0 +1,247 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Template Instantiation Analysis Example\n",
+    "\n",
+    "This notebook demonstrates how to use the template analysis functions to understand C++ template instantiation costs in Clang's `-ftime-trace` output.\n",
+    "\n",
+    "## Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "from pathlib import Path\n",
+    "\n",
+    "# Add parent directory to path\n",
+    "sys.path.insert(0, str(Path.cwd().parent))\n",
+    "\n",
+    "from trace_analysis import (\n",
+    "    parse_file,\n",
+    "    get_template_instantiation_events,\n",
+    "    get_phase_breakdown,\n",
+    "    get_metadata,\n",
+    ")\n",
+    "\n",
+    "import pandas as pd\n",
+    "from datetime import datetime\n",
+    "import plotly.express as px\n",
+    "\n",
+    "\n",
+    "# Display settings\n",
+    "pd.set_option(\"display.max_rows\", 100)\n",
+    "pd.set_option(\"display.max_columns\", None)\n",
+    "pd.set_option(\"display.width\", None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Trace File"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load your trace file\n",
+    "trace_file = Path(\n",
+    "    \"../../../build-trace/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.json\"\n",
+    ")\n",
+    "df = parse_file(trace_file)\n",
+    "\n",
+    "print(f\"Total events: {len(df):,}\")\n",
+    "starting_timestamp = datetime.fromtimestamp(df.attrs[\"beginningOfTime\"] / 1e6)\n",
+    "print(f\"Starting timestamp: {starting_timestamp.strftime('%Y-%m-%d:%H:%M:%S')}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "get_metadata(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compilation Overview"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get phase breakdown and display it\n",
+    "breakdown = get_phase_breakdown(df)\n",
+    "print(breakdown)\n",
+    "display(breakdown)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract data for plotly charts (sunburst, tree-map, or icicle)\n",
+    "plotly_data = breakdown.to_plotly()\n",
+    "fig = px.sunburst(**plotly_data)\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Template Instantiation Analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get all template instantiation events (now with parsed columns!)\n",
+    "template_events = get_template_instantiation_events(df)\n",
+    "\n",
+    "print(f\"Total template instantiation events: {len(template_events):,}\")\n",
+    "print(f\"Total template time: {template_events['dur'].sum() / 1000:.1f} ms\")\n",
+    "display(template_events)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Examine Parsed Columns\n",
+    "\n",
+    "The `get_template_instantiation_events()` function automatically parses the `arg_detail` column into structured fields:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Show the new parsed columns\n",
+    "print(\"Parsed columns available:\")\n",
+    "print(\"- namespace: Top-level namespace (e.g., 'std', 'ck')\")\n",
+    "print(\"- template_name: Template name without parameters\")\n",
+    "print(\"- full_qualified_name: Full namespace::template_name\")\n",
+    "print(\"- param_count: Number of template parameters\")\n",
+    "print(\"- is_ck_type: Boolean indicating CK library types\")\n",
+    "print(\"- is_nested: Boolean indicating nested templates\")\n",
+    "print()\n",
+    "\n",
+    "# Display sample of parsed data\n",
+    "template_events[\n",
+    "    [\"namespace\", \"template_name\", \"param_count\", \"is_ck_type\", \"is_nested\", \"dur\"]\n",
+    "].head(20)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Analysis by Namespace"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Group by namespace to see where time is spent\n",
+    "namespace_summary = (\n",
+    "    template_events.groupby(\"namespace\")\n",
+    "    .agg({\"dur\": [\"count\", \"sum\", \"mean\"], \"param_count\": \"mean\"})\n",
+    "    .round(2)\n",
+    ")\n",
+    "\n",
+    "namespace_summary.columns = [\"count\", \"total_dur\", \"avg_dur\", \"avg_params\"]\n",
+    "namespace_summary[\"total_ms\"] = namespace_summary[\"total_dur\"] / 1000\n",
+    "namespace_summary = namespace_summary.sort_values(\"total_dur\", ascending=False)\n",
+    "\n",
+    "print(\"\\nTemplate Instantiation Time by Namespace:\")\n",
+    "display(namespace_summary)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### CK Library Templates Analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Filter to CK types only\n",
+    "ck_templates = template_events[template_events[\"is_ck_type\"]].copy()\n",
+    "\n",
+    "print(f\"CK template instantiations: {len(ck_templates):,}\")\n",
+    "print(f\"CK template time: {ck_templates['dur'].sum() / 1000:.1f} ms\")\n",
+    "print(\n",
+    "    f\"Percentage of total template time: {100 * ck_templates['dur'].sum() / template_events['dur'].sum():.1f}%\"\n",
+    ")\n",
+    "print()\n",
+    "\n",
+    "# Top CK templates by time\n",
+    "ck_by_name = (\n",
+    "    ck_templates.groupby(\"template_name\")\n",
+    "    .agg({\"dur\": [\"count\", \"sum\", \"mean\"]})\n",
+    "    .round(2)\n",
+    ")\n",
+    "ck_by_name.columns = [\"count\", \"total_dur\", \"avg_dur\"]\n",
+    "ck_by_name[\"total_ms\"] = ck_by_name[\"total_dur\"] / 1000\n",
+    "ck_by_name = ck_by_name.sort_values(\"total_dur\", ascending=False)\n",
+    "\n",
+    "print(\"\\nTop CK Templates by Total Time:\")\n",
+    "display(ck_by_name.head(20))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/script/analyze_build/requirements.txt
+++ b/script/analyze_build/requirements.txt
@@ -0,0 +1,18 @@
+# Build Trace Analysis - Python Dependencies
+
+# Core data processing
+pandas>=2.0.0
+orjson>=3.9.0
+
+# Jupyter notebook support
+nbformat>=4.2.0
+ipykernel>=6.0.0
+
+# Interactive visualizations
+plotly>=5.0.0
+
+# Static image export from Plotly
+kaleido>=0.2.0
+
+# Full Jupyter environment (if not using VSCode)
+jupyter>=1.0.0
--- a/script/analyze_build/trace_analysis/init.py
+++ b/script/analyze_build/trace_analysis/init.py
@@ -0,0 +1,34 @@
+# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
+# SPDX-License-Identifier: MIT
+
+"""
+Build Trace Analysis - Core library for analyzing Clang -ftime-trace data.
+
+This package provides tools to parse and analyze Clang's -ftime-trace JSON output
+for build performance analysis.
+"""
+
+from .parse_file import (
+    parse_file,
+    get_metadata,
+)
+
+from .template_analysis import (
+    get_template_instantiation_events,
+)
+
+from .phase_breakdown import (
+    get_phase_breakdown,
+    PhaseBreakdown,
+)
+
+__all__ = [
+    # Core parsing and filtering
+    "parse_file",
+    "get_metadata",
+    # Template analysis
+    "get_template_instantiation_events",
+    # Phase breakdown
+    "get_phase_breakdown",
+    "PhaseBreakdown",
+]
--- a/script/analyze_build/trace_analysis/parse_file.py
+++ b/script/analyze_build/trace_analysis/parse_file.py
@@ -0,0 +1,356 @@
+# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
+# SPDX-License-Identifier: MIT
+
+"""
+Parse a single Clang -ftime-trace JSON file into a Pandas DataFrame.
+
+This module provides fast parsing of Clang's -ftime-trace output using orjson
+for performance. The JSON file is typically a single-line array of trace events.
+"""
+
+import orjson
+import pandas as pd
+from pathlib import Path
+from typing import Union, Optional
+from datetime import datetime
+from dataclasses import dataclass
+
+
+# Expected schema for trace event DataFrames with optimized dtypes
+# This enforces strict column validation and memory-efficient types
+# The memory usage is dominated by arg detail, but we optimize each series.
+TRACE_EVENT_DTYPES = {
+    "pid": "int32",  # Process ID (max observed: ~2.3M, fits in int32)
+    "tid": "int32",  # Thread ID (max observed: ~2.3M, fits in int32)
+    "ts": "int64",  # Timestamp in microseconds (requires int64 for epoch times)
+    "cat": "category",  # Category (low cardinality, use categorical)
+    "ph": "category",  # Phase type (very low cardinality: X, B, E, i, etc.)
+    "id": "int64",  # Event ID
+    "name": "category",  # Event name (medium cardinality, use categorical)
+    "dur": "int64",  # Duration in microseconds (max 10 days = 864B μs, requires int64)
+    "arg_detail": "string",  # Detail string (high cardinality, keep as string)
+    "arg_count": "int64",  # Argument count
+    "arg_avg ms": "int64",  # Average milliseconds
+    "arg_name": "category",  # Argument name (medium cardinality, use categorical)
+}
+
+
+@dataclass
+class FileMetadata:
+    """
+    Processed metadata with computed fields for compilation analysis.
+
+    This extends the raw metadata with derived values like formatted timestamps
+    and converted time units for convenience.
+
+    Attributes:
+        source_file: Main .cpp/.c file being compiled
+        time_granularity: Time unit used in trace (always "microseconds" for Clang)
+        beginning_of_time: Epoch timestamp in microseconds from JSON root
+        execute_compiler_ts: Timestamp of ExecuteCompiler event (microseconds)
+        execute_compiler_dur: Duration of ExecuteCompiler event (microseconds)
+        total_wall_time_us: Total compilation time in microseconds (same as execute_compiler_dur)
+        total_wall_time_s: Total compilation time in seconds (computed from microseconds)
+        wall_start_time: Wall clock start time in microseconds since epoch (computed)
+        wall_end_time: Wall clock end time in microseconds since epoch (computed)
+        wall_start_datetime: Human-readable start time string (formatted)
+        wall_end_datetime: Human-readable end time string (formatted)
+    """
+
+    source_file: Optional[str] = None
+    time_granularity: str = "microseconds"
+    beginning_of_time: Optional[int] = None
+    execute_compiler_ts: Optional[int] = None
+    execute_compiler_dur: Optional[int] = None
+    total_wall_time_us: Optional[int] = None
+    total_wall_time_s: Optional[float] = None
+    wall_start_time: Optional[int] = None
+    wall_end_time: Optional[int] = None
+    wall_start_datetime: Optional[str] = None
+    wall_end_datetime: Optional[str] = None
+
+    def __repr__(self):
+        # auto-generate pretty lines
+        fields = "\n".join(
+            f"  {name} = {value!r}" for name, value in self.__dict__.items()
+        )
+        return f"{self.__class__.__name__}(\n{fields}\n)"
+
+
+def parse_file(filepath: Union[str, Path]) -> pd.DataFrame:
+    """
+    Parse a Clang -ftime-trace JSON file into a Pandas DataFrame.
+
+    The -ftime-trace format is a JSON array of trace events. Each event contains
+    fields like name, phase (ph), timestamp (ts), duration (dur), process/thread IDs,
+    and optional arguments (args).
+
+    The beginningOfTime value from the JSON structure is automatically extracted
+    and stored in df.attrs['beginningOfTime']. Use get_metadata(df) to get
+    processed metadata with event-derived fields and computed values.
+
+    Args:
+        filepath: Path to the -ftime-trace JSON file
+
+    Returns:
+        DataFrame with columns for each event field. Nested 'args' are flattened
+        with an 'arg_' prefix. The beginningOfTime value is stored in
+        df.attrs['beginningOfTime'].
+
+    Raises:
+        FileNotFoundError: If the file doesn't exist
+        ValueError: If the JSON is invalid or empty
+
+    Examples:
+        >>> df = parse_file('build/trace.json')
+        >>> df[['name', 'dur']].head()
+        >>>
+        >>> # Access processed metadata
+        >>> metadata = get_metadata(df)
+        >>> print(f"Compiled: {metadata.source_file}")
+        >>> print(f"Duration: {metadata.total_wall_time_s:.2f}s")
+        >>>
+        >>> # Access beginningOfTime directly if needed
+        >>> beginning = df.attrs.get('beginningOfTime')
+        >>> print(f"Beginning of time: {beginning}")
+    """
+    filepath = Path(filepath)
+
+    if not filepath.exists():
+        raise FileNotFoundError(f"Trace file not found: {filepath}")
+
+    # Read and parse JSON using orjson for speed
+    with open(filepath, "rb") as f:
+        data = orjson.loads(f.read())
+
+    if not data:
+        raise ValueError(f"Empty trace data in file: {filepath}")
+
+    # Handle both formats: direct array or {"traceEvents": [...]}
+    if isinstance(data, dict):
+        if "traceEvents" in data:
+            events = data["traceEvents"]
+        else:
+            raise ValueError(
+                f"Expected 'traceEvents' key in JSON object, got keys: {list(data.keys())}"
+            )
+    elif isinstance(data, list):
+        events = data
+    else:
+        raise ValueError(f"Expected JSON array or object, got {type(data).__name__}")
+
+    # Convert to DataFrame
+    df = pd.DataFrame(events)
+
+    if df.empty:
+        raise ValueError(f"No trace events found in file: {filepath}")
+
+    # Flatten 'args' column if it exists
+    if "args" in df.columns:
+        df = _flatten_args(df)
+
+    # Validate schema: check for missing columns
+    expected_columns = set(TRACE_EVENT_DTYPES.keys())
+    actual_columns = set(df.columns)
+
+    missing_columns = expected_columns - actual_columns
+    if missing_columns:
+        raise ValueError(
+            f"Missing expected columns in trace data: {sorted(missing_columns)}"
+        )
+
+    # Validate schema: check for unexpected columns
+    unexpected_columns = actual_columns - expected_columns
+    if unexpected_columns:
+        raise ValueError(
+            f"Unexpected columns found in trace data: {sorted(unexpected_columns)}"
+        )
+
+    # Apply optimized dtypes with strict type enforcement
+    for col, dtype in TRACE_EVENT_DTYPES.items():
+        if dtype in ("int64", "int32"):
+            # Fill missing values with 0 for integer columns, then convert to specified int type
+            df[col] = df[col].fillna(0).astype(dtype)
+        elif dtype == "category":
+            # Convert to categorical for memory efficiency with repeated values
+            df[col] = df[col].astype("category")
+        elif dtype == "string":
+            # Convert to pandas string dtype for memory efficiency
+            df[col] = df[col].astype("string")
+        else:
+            raise ValueError(f"Unsupported dtype '{dtype}' for column '{col}'")
+
+    # Extract and store beginningOfTime in DataFrame attributes
+    df.attrs["beginningOfTime"] = (
+        data.get("beginningOfTime") if isinstance(data, dict) else None
+    )
+
+    return df
+
+
+def _flatten_args(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Flatten the 'args' column into separate columns with 'arg_' prefix.
+
+    The 'args' field in trace events contains additional metadata as a dictionary.
+    This function extracts those key-value pairs into separate columns.
+
+    Args:
+        df: DataFrame with an 'args' column containing dictionaries
+
+    Returns:
+        DataFrame with flattened args columns and original 'args' column removed
+    """
+    # Extract args into separate DataFrame
+    args_data = []
+    for idx, row in df.iterrows():
+        args = row.get("args", {})
+        if isinstance(args, dict):
+            args_data.append(args)
+        else:
+            args_data.append({})
+
+    if args_data:
+        args_df = pd.DataFrame(args_data)
+        # Prefix all args columns with 'arg_'
+        args_df.columns = [f"arg_{col}" for col in args_df.columns]
+
+        # Drop original args column and concatenate flattened args
+        df = df.drop(columns=["args"])
+        df = pd.concat([df, args_df], axis=1)
+
+    return df
+
+
+def _normalize_source_path(file_path: str) -> str:
+    """
+    Normalize a source file path to be relative to composable_kernel if present.
+
+    If 'composable_kernel' appears in the path, returns the path starting from
+    'composable_kernel/'. Otherwise, returns the original path unchanged.
+
+    Args:
+        file_path: Full filesystem path to a source file
+
+    Returns:
+        Normalized path starting from composable_kernel, or original path if
+        composable_kernel is not found
+
+    Examples:
+        >>> _normalize_source_path('/home/user/composable_kernel/include/ck/tensor.hpp')
+        'composable_kernel/include/ck/tensor.hpp'
+        >>> _normalize_source_path('/usr/include/vector')
+        '/usr/include/vector'
+    """
+    path = Path(file_path)
+    parts = path.parts
+
+    # Find the last occurrence of 'composable_kernel' in the path
+    for i in range(len(parts) - 1, -1, -1):
+        if parts[i] == "composable_kernel":
+            # Return path from composable_kernel onwards
+            return str(Path(*parts[i:]))
+
+    # If composable_kernel not found, return original path
+    return file_path
+
+
+def get_metadata(df: pd.DataFrame) -> FileMetadata:
+    """
+    Extract and process compilation metadata from a DataFrame.
+
+    This function processes events from the DataFrame to extract compilation
+    information, then computes derived fields like formatted timestamps and
+    converted time units.
+
+    Args:
+        df: DataFrame returned by parse_file() with beginningOfTime in its .attrs
+
+    Returns:
+        FileMetadata instance with both raw and computed fields:
+        - source_file: Main .cpp/.c file being compiled (from events)
+        - time_granularity: Time unit used in trace ("microseconds")
+        - beginning_of_time: Epoch timestamp in microseconds from JSON root
+        - execute_compiler_ts: Timestamp of ExecuteCompiler event (from events)
+        - execute_compiler_dur: Duration of ExecuteCompiler event (from events)
+        - total_wall_time_us: Total compilation time in microseconds
+        - total_wall_time_s: Total compilation time in seconds (computed)
+        - wall_start_time: Wall clock start time (computed)
+        - wall_end_time: Wall clock end time (computed)
+        - wall_start_datetime: Human-readable start time (formatted)
+        - wall_end_datetime: Human-readable end time (formatted)
+
+    Examples:
+        >>> df = parse_file('trace.json')
+        >>> metadata = get_metadata(df)
+        >>> print(f"Compiled: {metadata.source_file}")
+        >>> print(f"Duration: {metadata.total_wall_time_s:.2f}s")
+        >>> print(f"Started: {metadata.wall_start_datetime}")
+    """
+    # Extract beginningOfTime from DataFrame attributes
+    beginning_of_time = None
+    if hasattr(df, "attrs"):
+        beginning_of_time = df.attrs.get("beginningOfTime")
+
+    # Initialize metadata with beginningOfTime from JSON structure
+    metadata = FileMetadata(beginning_of_time=beginning_of_time)
+
+    # Process events to extract ExecuteCompiler timing information
+    if "name" in df.columns:
+        execute_compiler = df[df["name"] == "ExecuteCompiler"]
+        if not execute_compiler.empty:
+            # Get the first ExecuteCompiler event
+            event = execute_compiler.iloc[0]
+            if "ts" in event:
+                metadata.execute_compiler_ts = event["ts"]
+            if "dur" in event:
+                metadata.execute_compiler_dur = event["dur"]
+
+    # Process events to find the main source file being compiled
+    if "name" in df.columns and "arg_detail" in df.columns:
+        # Look for ParseDeclarationOrFunctionDefinition events with .cpp or .c files
+        source_extensions = (".cpp", ".cc", ".cxx", ".c")
+        parse_events = df[df["name"] == "ParseDeclarationOrFunctionDefinition"]
+
+        for _, event in parse_events.iterrows():
+            detail = event.get("arg_detail", "")
+            if detail:
+                # Extract file path (may include line:column info)
+                file_path = str(detail).split(":")[0]
+
+                # Check if it's a source file (not a header)
+                if any(file_path.endswith(ext) for ext in source_extensions):
+                    metadata.source_file = _normalize_source_path(file_path)
+                    break
+
+    # Compute derived fields
+    if metadata.execute_compiler_dur is not None:
+        metadata.total_wall_time_us = metadata.execute_compiler_dur
+        metadata.total_wall_time_s = metadata.execute_compiler_dur / 1_000_000.0
+
+    # Calculate wall clock times if we have the necessary data
+    if (
+        metadata.beginning_of_time is not None
+        and metadata.execute_compiler_ts is not None
+        and metadata.execute_compiler_dur is not None
+    ):
+        metadata.wall_start_time = (
+            metadata.beginning_of_time + metadata.execute_compiler_ts
+        )
+        metadata.wall_end_time = (
+            metadata.wall_start_time + metadata.execute_compiler_dur
+        )
+
+        # Convert to human-readable datetime strings
+        try:
+            start_dt = datetime.fromtimestamp(metadata.wall_start_time / 1_000_000.0)
+            end_dt = datetime.fromtimestamp(metadata.wall_end_time / 1_000_000.0)
+            metadata.wall_start_datetime = start_dt.strftime("%Y-%m-%d %H:%M:%S.%f")[
+                :-3
+            ]
+            metadata.wall_end_datetime = end_dt.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+        except (OSError, ValueError):
+            # Handle invalid timestamps gracefully
+            pass
+
+    return metadata
--- a/script/analyze_build/trace_analysis/phase_breakdown.py
+++ b/script/analyze_build/trace_analysis/phase_breakdown.py
@@ -0,0 +1,354 @@
+# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
+# SPDX-License-Identifier: MIT
+
+"""
+Phase breakdown analysis for Clang -ftime-trace data.
+
+This module provides hierarchical breakdown of compilation phases using
+the pre-aggregated "Total" events from Clang's -ftime-trace output.
+
+The data is returned as a PhaseBreakdown object with rich display and
+analysis capabilities optimized for Jupyter notebooks.
+"""
+
+import pandas as pd
+from collections import namedtuple
+from typing import Optional
+
+
+# Lightweight namedtuple for iteration
+Phase = namedtuple("Phase", ["name", "depth", "duration", "duration_ms", "percentage"])
+
+
+class PhaseBreakdown:
+    """
+    Wrapper for compilation phase breakdown with notebook-friendly API.
+
+    Provides hierarchical view of compilation phases from Clang -ftime-trace,
+    with rich display, filtering, and visualization capabilities.
+
+    Examples:
+        >>> breakdown = get_phase_breakdown(df)
+        >>>
+        >>> # Display in Jupyter
+        >>> breakdown
+        >>>
+        >>> # Access specific phases
+        >>> breakdown['InstantiateFunction']
+        >>> breakdown.frontend
+        >>> breakdown.backend
+        >>>
+        >>> # Get metrics
+        >>> print(f"Total: {breakdown.total_ms:.1f}ms")
+        >>>
+        >>> # Top N analysis
+        >>> breakdown.top(10)
+        >>> breakdown.frontend.top(5)
+        >>>
+        >>> # Visualization
+        >>> import plotly.express as px
+        >>> data = breakdown.to_plotly()
+        >>> fig = px.sunburst(**data)
+        >>> fig.show()
+        >>>
+        >>> # Iteration
+        >>> for phase in breakdown:
+        >>>     print(f"{phase.name}: {phase.duration_ms:.1f}ms")
+    """
+
+    def __init__(self, df: pd.DataFrame):
+        """
+        Initialize from phase breakdown DataFrame.
+
+        Args:
+            df: DataFrame with columns name, parent, depth, duration
+        """
+        if df.empty:
+            self._df = pd.DataFrame(columns=["name", "parent", "depth", "duration"])
+            self._total_time = 0
+        else:
+            self._df = df
+            self._total_time = self._get_total_time()
+
+    def __repr__(self) -> str:
+        """Simple text representation for console."""
+        if self._df.empty:
+            return "PhaseBreakdown(empty)"
+        n_phases = len(self._df)
+        return f"PhaseBreakdown({n_phases} phases, {self._total_time:.1f}ms total)"
+
+    def _repr_html_(self) -> str:
+        """Rich HTML representation for Jupyter notebooks."""
+        if self._df.empty:
+            return "<div><i>PhaseBreakdown(empty)</i></div>"
+        return self.to_dataframe()._repr_html_()
+
+    @property
+    def df(self) -> pd.DataFrame:
+        """
+        Access underlying DataFrame.
+
+        Returns:
+            DataFrame with columns name, parent, depth, duration
+        """
+        return self._df
+
+    def to_dataframe(self, show_percentages: bool = True) -> pd.DataFrame:
+        """
+        Format as DataFrame for display.
+
+        Creates a nicely formatted DataFrame suitable for Jupyter notebook display.
+
+        Args:
+            show_percentages: Include percentage of total time
+
+        Returns:
+            DataFrame with formatted columns
+        """
+        return self._format_dataframe(show_percentages)
+
+    def to_plotly(self) -> dict:
+        """
+        Convert to plotly hierarchical visualization format.
+
+        Returns a dictionary with data_frame, values, and path that can be directly
+        used with plotly.express sunburst, treemap, or icicle charts.
+
+        Returns:
+            Dictionary with keys: data_frame, values, path, branchvalues
+
+        Example:
+            >>> data = breakdown.to_plotly()
+            >>> import plotly.express as px
+            >>>
+            >>> # Create sunburst chart
+            >>> fig = px.sunburst(**data)
+            >>> fig.show()
+            >>>
+            >>> # Create treemap chart
+            >>> fig = px.treemap(**data)
+            >>> fig.show()
+            >>>
+            >>> # Create icicle chart
+            >>> fig = px.icicle(**data)
+            >>> fig.show()
+        """
+        return self._build_plotly_data()
+
+    # Internal helper methods
+
+    def _get_total_time(self) -> int:
+        """Get total time from root ExecuteCompiler event."""
+        root = self._df[self._df["depth"] == 0]
+        if root.empty:
+            return 0
+        return int(root.iloc[0]["duration"])
+
+    def _format_dataframe(self, show_percentages: bool) -> pd.DataFrame:
+        """Format phase breakdown as DataFrame."""
+        if self._df.empty:
+            return pd.DataFrame()
+
+        display_rows = []
+        for _, row in self._df.iterrows():
+            duration_ms = row["duration"] / 1000.0
+            display_row = {
+                "Name": row["name"],
+                "Parent": row["parent"] if row["parent"] else "(root)",
+                "Depth": row["depth"],
+                "Duration (ms)": duration_ms,
+            }
+            if show_percentages and self._total_time > 0:
+                pct = row["duration"] / self._total_time * 100
+                display_row["% of Total"] = pct
+            display_rows.append(display_row)
+
+        display_df = pd.DataFrame(display_rows)
+
+        if show_percentages:
+            display_df["% of Total"] = display_df["% of Total"].round(1)
+
+        return display_df
+
+    def _build_plotly_data(self) -> dict:
+        """Convert to plotly hierarchical visualization format."""
+        return {
+            "data_frame": self._df,
+            "names": "name",
+            "parents": "parent",
+            "values": "duration",
+            "branchvalues": "total",
+        }
+
+
+# Hierarchical phase specification
+# There are over 100 totals in the JSON file, but a lot of them overlap.
+# If the children total more than their parent, we will throw a ValueError.
+#
+# The hierarchy is specified as a nested dictionary where:
+# - Keys are phase names (matching "Total <name>" events in the trace)
+# - Values are dictionaries of child phases (or empty dict {} for leaf nodes)
+# - Empty string "" as a key means "calculate Other as residual"
+#
+# This structure supports arbitrary nesting depth.
+PHASE_HIERARCHY = {
+    "ExecuteCompiler": {
+        "Frontend": {
+            "InstantiateFunction": {},
+        },
+        "Backend": {
+            "Optimizer": {},
+            "CodeGenPasses": {},
+        },
+    }
+}
+
+
+def get_phase_breakdown(df: pd.DataFrame) -> PhaseBreakdown:
+    """
+    Get hierarchical breakdown of compilation phases.
+
+    Returns a PhaseBreakdown object with rich display and analysis methods,
+    using the pre-aggregated "Total" events from Clang's -ftime-trace output
+    for accurate statistics.
+
+    All durations are in microseconds.
+
+    The hierarchy is defined by the PHASE_HIERARCHY constant and supports
+    arbitrary nesting depth. The tree is traversed recursively to build
+    the phase breakdown.
+
+    Args:
+        df: DataFrame from parse_file()
+
+    Returns:
+        PhaseBreakdown object with rich display and analysis methods
+
+    Raises:
+        ValueError: If required Total events are missing or if calculated
+                   "Other" values are negative (indicating data inconsistency)
+
+    Examples:
+        >>> df = parse_file('trace.json')
+        >>> breakdown = get_phase_breakdown(df)
+        >>>
+        >>> # Display in Jupyter (automatic)
+        >>> breakdown
+        >>>
+        >>> # Get total compilation time
+        >>> print(f"Total: {breakdown.total_ms:.1f}ms")
+        >>>
+        >>> # Access specific phases
+        >>> breakdown['InstantiateFunction']
+        >>> breakdown.frontend
+        >>> breakdown.backend.top(5)
+        >>>
+        >>> # Visualize
+        >>> import plotly.express as px
+        >>> data = breakdown.to_plotly()
+        >>> fig = px.sunburst(**data)
+        >>> fig.show()
+    """
+    if "name" not in df.columns or "dur" not in df.columns:
+        raise ValueError("DataFrame missing required 'name' or 'dur' columns")
+
+    # Pre-filter to Total events for efficient lookup
+    total_events = df[df["name"].str.startswith("Total ", na=False)].copy()
+    total_events["phase"] = total_events["name"].str.removeprefix("Total ")
+
+    def get_duration(phase_name: str) -> Optional[int]:
+        """Get duration in microseconds from a Total event."""
+        matches = total_events[total_events["phase"] == phase_name]
+        if matches.empty:
+            return None
+        return int(matches.iloc[0]["dur"])
+
+    def process_node(
+        node_name: str,
+        parent_name: str,
+        depth: int,
+        children_spec: dict,
+    ) -> list[dict]:
+        """
+        Recursively process a node and its children in the phase hierarchy.
+
+        Args:
+            node_name: Name of the current phase node
+            parent_name: Name of the parent phase (empty string for root)
+            depth: Current depth in the tree (0 for root)
+            children_spec: Dictionary of child phases to process
+
+        Returns:
+            List of row dictionaries for this node and all descendants
+
+        Raises:
+            ValueError: If phase not found or children exceed parent duration
+        """
+        # Get duration for this node
+        node_duration = get_duration(node_name)
+        if node_duration is None:
+            raise ValueError(f"No Total {node_name} event found in trace")
+
+        # Add current node
+        rows = [
+            {
+                "name": node_name,
+                "parent": parent_name,
+                "depth": depth,
+                "duration": node_duration,
+            }
+        ]
+
+        if not children_spec:
+            return rows
+
+        # Process all children recursively
+        children_total = 0
+        for child_name, grandchildren_spec in children_spec.items():
+            if child_name == "":
+                # Empty string means "Other" - skip for now, calculate as residual
+                continue
+
+            # Recursively process this child and its descendants
+            child_rows = process_node(
+                child_name, node_name, depth + 1, grandchildren_spec
+            )
+            rows.extend(child_rows)
+
+            # Track total duration of direct children only (not grandchildren)
+            children_total += child_rows[0]["duration"]
+
+        # Calculate and add "Other" if there's unaccounted time
+        other_duration = node_duration - children_total
+        if other_duration < 0:
+            raise ValueError(
+                f"{node_name} children total ({children_total}) "
+                f"exceeds parent total ({node_duration})"
+            )
+
+        if other_duration > 0:
+            rows.append(
+                {
+                    "name": f"{node_name}_Other",
+                    "parent": node_name,
+                    "depth": depth + 1,
+                    "duration": other_duration,
+                }
+            )
+
+        return rows
+
+    # Start recursive traversal from root
+    root_name = "ExecuteCompiler"
+    if root_name not in PHASE_HIERARCHY:
+        raise ValueError(f"Root phase '{root_name}' not found in PHASE_HIERARCHY")
+
+    all_rows = process_node(
+        root_name,
+        "",  # Root has no parent
+        0,  # Root is at depth 0
+        PHASE_HIERARCHY[root_name],
+    )
+
+    breakdown_df = pd.DataFrame(all_rows)
+    return PhaseBreakdown(breakdown_df)
--- a/script/analyze_build/trace_analysis/template_analysis.py
+++ b/script/analyze_build/trace_analysis/template_analysis.py
@@ -0,0 +1,80 @@
+# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
+# SPDX-License-Identifier: MIT
+
+"""
+Template instantiation analysis for Clang -ftime-trace data.
+
+This module provides specialized functions for analyzing C++ template
+instantiation costs from Clang's -ftime-trace output.
+"""
+
+import pandas as pd
+from .template_parser import parse_template_detail
+
+
+def get_template_instantiation_events(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Filter to template instantiation events and parse arg_detail into structured columns.
+
+    Returns events for:
+    - InstantiateFunction: Function template instantiations
+    - InstantiateClass: Class template instantiations
+
+    The returned DataFrame includes parsed columns from arg_detail:
+    - namespace: Top-level namespace (e.g., 'std', 'ck')
+    - template_name: Template name without parameters
+    - full_qualified_name: Full namespace::template_name
+    - param_count: Number of template parameters
+    - is_ck_type: Boolean indicating if this is a CK library type
+    - is_nested: Boolean indicating if contains nested templates
+
+    Args:
+        df: DataFrame from parse_file()
+
+    Returns:
+        Filtered DataFrame containing template instantiation events with parsed columns
+
+    Example:
+        >>> df = parse_file('trace.json')
+        >>> templates = get_template_instantiation_events(df)
+        >>> templates.sort_values('dur', ascending=False).head(10)
+        >>> # Filter to CK types only
+        >>> ck_templates = templates[templates['is_ck_type']]
+        >>> # Group by template name
+        >>> templates.groupby('template_name')['dur'].sum()
+    """
+    # Filter to template instantiation events
+    filtered_df = (
+        df[
+            df["name"].isin(
+                [
+                    "InstantiateClass",
+                    "InstantiateFunction",
+                ]
+            )
+        ]
+        .drop(
+            columns=[
+                "arg_avg ms",
+                "arg_count",
+                "arg_name",
+                "cat",
+                "id",
+                "ph",
+                "pid",
+                "tid",
+            ]
+        )
+        .reset_index(drop=True)
+    )
+
+    # Parse arg_detail into structured columns
+    parsed_data = filtered_df["arg_detail"].apply(parse_template_detail)
+
+    # Convert list of dicts to DataFrame and join with original
+    parsed_df = pd.DataFrame(parsed_data.tolist())
+
+    # Combine with original data
+    result_df = pd.concat([filtered_df, parsed_df], axis=1)
+
+    return result_df
--- a/script/analyze_build/trace_analysis/template_parser.py
+++ b/script/analyze_build/trace_analysis/template_parser.py
@@ -0,0 +1,301 @@
+# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
+# SPDX-License-Identifier: MIT
+
+"""
+Template detail string parser for C++ template instantiations.
+
+This module provides functions to parse the arg_detail strings from
+Clang's -ftime-trace output into structured components.
+"""
+
+import re
+from typing import Dict
+
+
+def parse_template_detail(detail_str: str) -> Dict[str, any]:
+    """
+    Parse a template detail string into structured components.
+
+    Args:
+        detail_str: The arg_detail string from -ftime-trace
+
+    Returns:
+        Dictionary with parsed fields:
+        - namespace: Top-level namespace (e.g., 'std', 'ck')
+        - template_name: Template name without parameters
+        - full_qualified_name: Full namespace::template_name
+        - param_count: Number of template parameters
+        - is_ck_type: Boolean indicating if this is a CK library type
+        - is_nested: Boolean indicating if contains nested templates
+
+    Example:
+        >>> parse_template_detail('std::basic_string<char>')
+        {
+            'namespace': 'std',
+            'template_name': 'basic_string',
+            'full_qualified_name': 'std::basic_string',
+            'param_count': 1,
+            'is_ck_type': False,
+            'is_nested': False
+        }
+    """
+    # Handle empty or invalid strings
+    if not detail_str or not isinstance(detail_str, str):
+        return _empty_result()
+
+    # Remove surrounding quotes if present
+    detail_str = detail_str.strip('"')
+
+    # Extract components
+    namespace = extract_namespace(detail_str)
+    template_name = extract_template_name(detail_str)
+    full_qualified_name = extract_full_qualified_name(detail_str)
+    param_count = count_template_params(detail_str)
+    is_ck = is_ck_template(detail_str)
+    is_nested = is_nested_template(detail_str)
+
+    return {
+        "namespace": namespace,
+        "template_name": template_name,
+        "full_qualified_name": full_qualified_name,
+        "param_count": param_count,
+        "is_ck_type": is_ck,
+        "is_nested": is_nested,
+    }
+
+
+def extract_namespace(detail_str: str) -> str:
+    """
+    Extract the top-level namespace from a template detail string.
+
+    Args:
+        detail_str: The template detail string
+
+    Returns:
+        The top-level namespace, or empty string if none found
+
+    Example:
+        >>> extract_namespace('std::basic_string<char>')
+        'std'
+        >>> extract_namespace('ck::tensor_operation::device::DeviceConv2d<...>')
+        'ck'
+    """
+    if not detail_str:
+        return ""
+
+    # Remove quotes
+    detail_str = detail_str.strip('"')
+
+    # Find first :: separator
+    match = re.match(r"^([a-zA-Z_][a-zA-Z0-9_]*)::", detail_str)
+    if match:
+        return match.group(1)
+
+    # No namespace found - check if it's a simple type
+    match = re.match(r"^([a-zA-Z_][a-zA-Z0-9_]*)", detail_str)
+    if match:
+        return match.group(1)
+
+    return ""
+
+
+def extract_template_name(detail_str: str) -> str:
+    """
+    Extract the template name without namespace or parameters.
+
+    Args:
+        detail_str: The template detail string
+
+    Returns:
+        The template name without namespace or parameters
+
+    Example:
+        >>> extract_template_name('std::basic_string<char>')
+        'basic_string'
+        >>> extract_template_name('ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3<...>')
+        'GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3'
+    """
+    if not detail_str:
+        return ""
+
+    # Remove quotes
+    detail_str = detail_str.strip('"')
+
+    # Find the last component before < or end of string
+    # This handles nested namespaces like ck::tensor_operation::device::DeviceConv2d
+    match = re.search(r"::([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:<|$)", detail_str)
+    if match:
+        return match.group(1)
+
+    # No :: found, try to get name before <
+    match = re.match(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:<|$)", detail_str)
+    if match:
+        return match.group(1)
+
+    return ""
+
+
+def extract_full_qualified_name(detail_str: str) -> str:
+    """
+    Extract the full qualified name (namespace::...::template_name).
+
+    Args:
+        detail_str: The template detail string
+
+    Returns:
+        The full qualified name without template parameters
+
+    Example:
+        >>> extract_full_qualified_name('std::basic_string<char>')
+        'std::basic_string'
+        >>> extract_full_qualified_name('ck::tensor_operation::device::DeviceConv2d<...>')
+        'ck::tensor_operation::device::DeviceConv2d'
+    """
+    if not detail_str:
+        return ""
+
+    # Remove quotes
+    detail_str = detail_str.strip('"')
+
+    # Match everything up to the first < or end of string
+    match = re.match(r"^([a-zA-Z_:][a-zA-Z0-9_:]*)\s*(?:<|$)", detail_str)
+    if match:
+        return match.group(1)
+
+    return ""
+
+
+def count_template_params(detail_str: str) -> int:
+    """
+    Count the number of top-level template parameters.
+
+    This counts commas at the top level of template brackets,
+    not commas inside nested templates.
+
+    Args:
+        detail_str: The template detail string
+
+    Returns:
+        Number of template parameters, or 0 if not a template
+
+    Example:
+        >>> count_template_params('std::basic_string<char>')
+        1
+        >>> count_template_params('std::tuple<int, float, double>')
+        3
+    """
+    if not detail_str or "<" not in detail_str:
+        return 0
+
+    # Remove quotes
+    detail_str = detail_str.strip('"')
+
+    # Find the template parameter section
+    start = detail_str.find("<")
+    if start == -1:
+        return 0
+
+    # Track bracket depth to only count top-level commas
+    depth = 0
+    param_count = 1  # Start with 1 (if there's a <, there's at least one param)
+    in_template = False
+
+    for i in range(start, len(detail_str)):
+        char = detail_str[i]
+
+        if char == "<":
+            depth += 1
+            in_template = True
+        elif char == ">":
+            depth -= 1
+            if depth == 0:
+                # We've closed the outermost template
+                break
+        elif char == "," and depth == 1:
+            # Top-level comma
+            param_count += 1
+
+    return param_count if in_template else 0
+
+
+def is_ck_template(detail_str: str) -> bool:
+    """
+    Check if this is a CK library template.
+
+    Args:
+        detail_str: The template detail string
+
+    Returns:
+        True if this is a CK library type, False otherwise
+
+    Example:
+        >>> is_ck_template('ck::tensor_operation::device::DeviceConv2d<...>')
+        True
+        >>> is_ck_template('std::basic_string<char>')
+        False
+    """
+    if not detail_str:
+        return False
+
+    # Remove quotes
+    detail_str = detail_str.strip('"')
+
+    # Check if it starts with ck:: or contains ::ck::
+    return detail_str.startswith("ck::") or "::ck::" in detail_str
+
+
+def is_nested_template(detail_str: str) -> bool:
+    """
+    Check if this template contains nested template instantiations.
+
+    Args:
+        detail_str: The template detail string
+
+    Returns:
+        True if contains nested templates, False otherwise
+
+    Example:
+        >>> is_nested_template('std::vector<int>')
+        False
+        >>> is_nested_template('std::vector<std::string>')
+        True
+    """
+    if not detail_str or "<" not in detail_str:
+        return False
+
+    # Remove quotes
+    detail_str = detail_str.strip('"')
+
+    # Find the template parameter section
+    start = detail_str.find("<")
+    if start == -1:
+        return False
+
+    # Look for nested < after the first one
+    depth = 0
+    for i in range(start, len(detail_str)):
+        char = detail_str[i]
+
+        if char == "<":
+            depth += 1
+            if depth > 1:
+                # Found a nested template
+                return True
+        elif char == ">":
+            depth -= 1
+            if depth == 0:
+                break
+
+    return False
+
+
+def _empty_result() -> Dict[str, any]:
+    """Return an empty result dictionary with default values."""
+    return {
+        "namespace": "",
+        "template_name": "",
+        "full_qualified_name": "",
+        "param_count": 0,
+        "is_ck_type": False,
+        "is_nested": False,
+    }