# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. # SPDX-License-Identifier: MIT """ Phase breakdown analysis for Clang -ftime-trace data. This module provides hierarchical breakdown of compilation phases using the pre-aggregated "Total" events from Clang's -ftime-trace output. The data is returned as a PhaseBreakdown object with rich display and analysis capabilities optimized for Jupyter notebooks. """ import pandas as pd from collections import namedtuple from typing import Optional # Lightweight namedtuple for iteration Phase = namedtuple("Phase", ["name", "depth", "duration", "duration_ms", "percentage"]) class PhaseBreakdown: """ Wrapper for compilation phase breakdown with notebook-friendly API. Provides hierarchical view of compilation phases from Clang -ftime-trace, with rich display, filtering, and visualization capabilities. Examples: >>> breakdown = get_phase_breakdown(df) >>> >>> # Display in Jupyter >>> breakdown >>> >>> # Access specific phases >>> breakdown['InstantiateFunction'] >>> breakdown.frontend >>> breakdown.backend >>> >>> # Get metrics >>> print(f"Total: {breakdown.total_ms:.1f}ms") >>> >>> # Top N analysis >>> breakdown.top(10) >>> breakdown.frontend.top(5) >>> >>> # Visualization >>> import plotly.express as px >>> data = breakdown.to_plotly() >>> fig = px.sunburst(**data) >>> fig.show() >>> >>> # Iteration >>> for phase in breakdown: >>> print(f"{phase.name}: {phase.duration_ms:.1f}ms") """ def __init__(self, df: pd.DataFrame): """ Initialize from phase breakdown DataFrame. Args: df: DataFrame with columns name, parent, depth, duration """ if df.empty: self._df = pd.DataFrame(columns=["name", "parent", "depth", "duration"]) self._total_time = 0 else: self._df = df self._total_time = self._get_total_time() def __repr__(self) -> str: """Simple text representation for console.""" if self._df.empty: return "PhaseBreakdown(empty)" n_phases = len(self._df) return f"PhaseBreakdown({n_phases} phases, {self._total_time:.1f}ms total)" def _repr_html_(self) -> str: """Rich HTML representation for Jupyter notebooks.""" if self._df.empty: return "
PhaseBreakdown(empty)
" return self.to_dataframe()._repr_html_() @property def df(self) -> pd.DataFrame: """ Access underlying DataFrame. Returns: DataFrame with columns name, parent, depth, duration """ return self._df def to_dataframe(self, show_percentages: bool = True) -> pd.DataFrame: """ Format as DataFrame for display. Creates a nicely formatted DataFrame suitable for Jupyter notebook display. Args: show_percentages: Include percentage of total time Returns: DataFrame with formatted columns """ return self._format_dataframe(show_percentages) def to_plotly(self) -> dict: """ Convert to plotly hierarchical visualization format. Returns a dictionary with data_frame, values, and path that can be directly used with plotly.express sunburst, treemap, or icicle charts. Returns: Dictionary with keys: data_frame, values, path, branchvalues Example: >>> data = breakdown.to_plotly() >>> import plotly.express as px >>> >>> # Create sunburst chart >>> fig = px.sunburst(**data) >>> fig.show() >>> >>> # Create treemap chart >>> fig = px.treemap(**data) >>> fig.show() >>> >>> # Create icicle chart >>> fig = px.icicle(**data) >>> fig.show() """ return self._build_plotly_data() # Internal helper methods def _get_total_time(self) -> int: """Get total time from root ExecuteCompiler event.""" root = self._df[self._df["depth"] == 0] if root.empty: return 0 return int(root.iloc[0]["duration"]) def _format_dataframe(self, show_percentages: bool) -> pd.DataFrame: """Format phase breakdown as DataFrame.""" if self._df.empty: return pd.DataFrame() display_rows = [] for _, row in self._df.iterrows(): duration_ms = row["duration"] / 1000.0 display_row = { "Name": row["name"], "Parent": row["parent"] if row["parent"] else "(root)", "Depth": row["depth"], "Duration (ms)": duration_ms, } if show_percentages and self._total_time > 0: pct = row["duration"] / self._total_time * 100 display_row["% of Total"] = pct display_rows.append(display_row) display_df = pd.DataFrame(display_rows) if show_percentages: display_df["% of Total"] = display_df["% of Total"].round(1) return display_df def _build_plotly_data(self) -> dict: """Convert to plotly hierarchical visualization format.""" return { "data_frame": self._df, "names": "name", "parents": "parent", "values": "duration", "branchvalues": "total", } # Hierarchical phase specification # There are over 100 totals in the JSON file, but a lot of them overlap. # If the children total more than their parent, we will throw a ValueError. # # The hierarchy is specified as a nested dictionary where: # - Keys are phase names (matching "Total " events in the trace) # - Values are dictionaries of child phases (or empty dict {} for leaf nodes) # - Empty string "" as a key means "calculate Other as residual" # # This structure supports arbitrary nesting depth. PHASE_HIERARCHY = { "ExecuteCompiler": { "Frontend": { "InstantiateFunction": {}, }, "Backend": { "Optimizer": {}, "CodeGenPasses": {}, }, } } def get_phase_breakdown(df: pd.DataFrame) -> PhaseBreakdown: """ Get hierarchical breakdown of compilation phases. Returns a PhaseBreakdown object with rich display and analysis methods, using the pre-aggregated "Total" events from Clang's -ftime-trace output for accurate statistics. All durations are in microseconds. The hierarchy is defined by the PHASE_HIERARCHY constant and supports arbitrary nesting depth. The tree is traversed recursively to build the phase breakdown. Args: df: DataFrame from parse_file() Returns: PhaseBreakdown object with rich display and analysis methods Raises: ValueError: If required Total events are missing or if calculated "Other" values are negative (indicating data inconsistency) Examples: >>> df = parse_file('trace.json') >>> breakdown = get_phase_breakdown(df) >>> >>> # Display in Jupyter (automatic) >>> breakdown >>> >>> # Get total compilation time >>> print(f"Total: {breakdown.total_ms:.1f}ms") >>> >>> # Access specific phases >>> breakdown['InstantiateFunction'] >>> breakdown.frontend >>> breakdown.backend.top(5) >>> >>> # Visualize >>> import plotly.express as px >>> data = breakdown.to_plotly() >>> fig = px.sunburst(**data) >>> fig.show() """ if "name" not in df.columns or "dur" not in df.columns: raise ValueError("DataFrame missing required 'name' or 'dur' columns") # Pre-filter to Total events for efficient lookup total_events = df[df["name"].str.startswith("Total ", na=False)].copy() total_events["phase"] = total_events["name"].str.removeprefix("Total ") def get_duration(phase_name: str) -> Optional[int]: """Get duration in microseconds from a Total event.""" matches = total_events[total_events["phase"] == phase_name] if matches.empty: return None return int(matches.iloc[0]["dur"]) def process_node( node_name: str, parent_name: str, depth: int, children_spec: dict, ) -> list[dict]: """ Recursively process a node and its children in the phase hierarchy. Args: node_name: Name of the current phase node parent_name: Name of the parent phase (empty string for root) depth: Current depth in the tree (0 for root) children_spec: Dictionary of child phases to process Returns: List of row dictionaries for this node and all descendants Raises: ValueError: If phase not found or children exceed parent duration """ # Get duration for this node node_duration = get_duration(node_name) if node_duration is None: raise ValueError(f"No Total {node_name} event found in trace") # Add current node rows = [ { "name": node_name, "parent": parent_name, "depth": depth, "duration": node_duration, } ] if not children_spec: return rows # Process all children recursively children_total = 0 for child_name, grandchildren_spec in children_spec.items(): if child_name == "": # Empty string means "Other" - skip for now, calculate as residual continue # Recursively process this child and its descendants child_rows = process_node( child_name, node_name, depth + 1, grandchildren_spec ) rows.extend(child_rows) # Track total duration of direct children only (not grandchildren) children_total += child_rows[0]["duration"] # Calculate and add "Other" if there's unaccounted time other_duration = node_duration - children_total if other_duration < 0: raise ValueError( f"{node_name} children total ({children_total}) " f"exceeds parent total ({node_duration})" ) if other_duration > 0: rows.append( { "name": f"{node_name}_Other", "parent": node_name, "depth": depth + 1, "duration": other_duration, } ) return rows # Start recursive traversal from root root_name = "ExecuteCompiler" if root_name not in PHASE_HIERARCHY: raise ValueError(f"Root phase '{root_name}' not found in PHASE_HIERARCHY") all_rows = process_node( root_name, "", # Root has no parent 0, # Root is at depth 0 PHASE_HIERARCHY[root_name], ) breakdown_df = pd.DataFrame(all_rows) return PhaseBreakdown(breakdown_df)