Files
composable_kernel/script/analyze_build/trace_analysis/parser.py
John Shumway 0caf06e6f1 Add build trace analysis tools for -ftime-trace data
Introduces a new Python toolset in script/analyze_build/ for analyzing
Clang -ftime-trace JSON output to identify compilation bottlenecks and
optimize C++ metaprogramming build times.

Key features:
- Fast parallel processing of trace json files  using all CPU cores (> 100 files/sec)
- Simple, cache-free architecture for consistent performance
- Comprehensive analysis of template instantiations and event types
- Command-line tools and Jupyter notebook support
- Automatic orjson detection for JSON parsing speedup

Components:
- trace_analysis/: Core library (models, parser, transformer)
- examples/: CLI tools for single-file and directory analysis
- notebooks/: Comprehensive Jupyter notebook with analysis patterns
- Detailed README with usage examples and performance data

Also adds ruff configuration to pyproject.toml to ignore E402 (module
level import not at top of file) for Jupyter notebooks, which commonly
have imports after markdown cells.

This toolset addresses the critical problem of long build times in CK's
C++17 metaprogramming codebase by treating -ftime-trace as a big data
problem, using pandas and modern analysis tools to understand compilation
patterns and measure improvement opportunities.
2026-01-03 18:28:22 -05:00

144 lines
3.6 KiB
Python

# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
"""
JSON parser for trace files.
Provides streaming JSON parsing to handle large trace files efficiently.
"""
from typing import List, Dict, Any
try:
import orjson
HAS_ORJSON = True
except ImportError:
import json
HAS_ORJSON = False
from .models import TraceFile
class TraceParser:
"""
Parser for Clang -ftime-trace JSON files.
Uses streaming JSON parsing to handle large files without loading
them entirely into memory.
"""
# Template-related event names
TEMPLATE_EVENT_NAMES = {
"InstantiateClass",
"InstantiateFunction",
"InstantiateVariable",
"ParseTemplate",
}
@staticmethod
def parse(trace_file: TraceFile) -> List[Dict[str, Any]]:
"""
Parse a trace file and return all events.
Args:
trace_file: TraceFile to parse
Returns:
List of event dictionaries
Note:
Uses orjson if available (1.65x faster than stdlib json),
otherwise falls back to standard json library. The -ftime-trace
files are single-line JSON, so we can load them efficiently.
"""
if HAS_ORJSON:
# orjson is significantly faster (1.65x) and reads bytes
with open(trace_file.path, "rb") as f:
data = orjson.loads(f.read())
else:
# Fallback to standard library
with open(trace_file.path, "r") as f:
data = json.load(f)
# Handle both dict format {"traceEvents": [...]} and direct list format
if isinstance(data, dict):
return data.get("traceEvents", [])
elif isinstance(data, list):
return data
else:
return []
@staticmethod
def parse_stream(trace_file: TraceFile):
"""
Stream events from a trace file without loading entire file.
Args:
trace_file: TraceFile to parse
Yields:
Individual event dictionaries
Note:
For compatibility, this now just yields from the parsed list.
The standard json library is much faster than ijson for these files.
"""
events = TraceParser.parse(trace_file)
for event in events:
yield event
@staticmethod
def is_template_event(event: Dict[str, Any]) -> bool:
"""
Check if an event is template-related.
Args:
event: Event dictionary
Returns:
True if event is template-related
"""
return event.get("name") in TraceParser.TEMPLATE_EVENT_NAMES
@staticmethod
def extract_template_detail(event: Dict[str, Any]) -> str:
"""
Extract template detail from an event.
Args:
event: Event dictionary
Returns:
Template detail string, or empty string if not available
"""
args = event.get("args", {})
return args.get("detail", "")
@staticmethod
def get_event_duration(event: Dict[str, Any]) -> int:
"""
Get the duration of an event in microseconds.
Args:
event: Event dictionary
Returns:
Duration in microseconds (0 if not available)
"""
return event.get("dur", 0)
@staticmethod
def get_event_name(event: Dict[str, Any]) -> str:
"""
Get the name of an event.
Args:
event: Event dictionary
Returns:
Event name (or "Unknown" if not available)
"""
return event.get("name", "Unknown")