diff --git a/.claude/skills/ck-build-analysis b/.claude/skills/ck-build-analysis index 1543705a51..7113001772 100755 --- a/.claude/skills/ck-build-analysis +++ b/.claude/skills/ck-build-analysis @@ -183,9 +183,10 @@ import re import sys from collections import defaultdict from datetime import datetime +from jinja2 import Environment, FileSystemLoader if len(sys.argv) < 4: - print("Usage: analyze.py ") + print("Usage: analyze.py ") sys.exit(1) trace_file = sys.argv[1] @@ -193,6 +194,7 @@ output_file = sys.argv[2] target = sys.argv[3] granularity = sys.argv[4] build_time = sys.argv[5] +template_dir = sys.argv[6] print(f'Loading trace file: {trace_file}') with open(trace_file, 'r') as f: @@ -223,8 +225,7 @@ for event in data.get('traceEvents', []): template_stats[template_name]['count'] += 1 template_stats[template_name]['total_dur'] += dur -print('Sorting and generating report...') -sorted_templates = sorted(template_stats.items(), key=lambda x: x[1]['total_dur'], reverse=True) +print('Sorting data...') sorted_phases = sorted(phase_stats.items(), key=lambda x: x[1], reverse=True) top_individual.sort(key=lambda x: x['dur'], reverse=True) @@ -233,126 +234,101 @@ total_trace_time = sum(phase_stats.values()) total_events = len(data.get('traceEvents', [])) total_inst = sum(s['count'] for s in template_stats.values()) -report = [] -report.append('# Composable Kernel Build Time Analysis Report') -report.append('') -report.append(f'**Generated:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}') -report.append(f'**Target:** {target}') -report.append(f'**Granularity:** {granularity}µs') -report.append('') -report.append('## Executive Summary') -report.append('') -report.append(f'- **Wall Clock Time:** {build_time} seconds') -report.append(f'- **Trace Time:** {total_trace_time/1000:.1f} seconds') -report.append(f'- **Template Instantiation Time:** {total_template_time/1000:.1f} seconds ({100*total_template_time/total_trace_time:.1f}% of trace)') -report.append(f'- **Total Events Captured:** {total_events:,}') -report.append(f'- **Total Template Instantiations:** {total_inst:,}') -report.append(f'- **Unique Template Families:** {len(sorted_templates)}') -report.append('') -report.append('## Compilation Phase Breakdown') -report.append('') -report.append('| Phase | Time (ms) | Time (s) | % of Total |') -report.append('|-------|-----------|----------|------------|') -for phase, dur in sorted_phases[:20]: - pct = 100 * dur / total_trace_time - report.append(f'| {phase:<40} | {dur:>9.2f} | {dur/1000:>8.2f} | {pct:>9.1f}% |') -report.append('') -report.append('## Top 30 Most Expensive Individual Instantiations') -report.append('') -report.append('| Rank | Template | Type | Time (ms) |') -report.append('|------|----------|------|-----------|') -for i, inst in enumerate(top_individual[:30], 1): - detail = inst['detail'][:70] + '...' if len(inst['detail']) > 70 else inst['detail'] - inst_type = 'Func' if inst['type'] == 'InstantiateFunction' else 'Class' - report.append(f'| {i:>4} | {detail:<70} | {inst_type:<5} | {inst["dur"]:>9.2f} |') -report.append('') -report.append('## Template Families by Total Time (Top 50)') -report.append('') -report.append('| Rank | Template Family | Count | Total (ms) | Avg (ms) | % of Total |') -report.append('|------|-----------------|-------|------------|----------|------------|') -for i, (name, stats) in enumerate(sorted_templates[:50], 1): - count = stats['count'] - total = stats['total_dur'] - avg = total / count if count > 0 else 0 - pct = 100 * total / total_template_time if total_template_time > 0 else 0 - display_name = name[:40] + '...' if len(name) > 40 else name - report.append(f'| {i:>4} | {display_name:<43} | {count:>5} | {total:>10.2f} | {avg:>8.2f} | {pct:>9.1f}% |') -report.append('') -report.append('## Template Families by Instantiation Count (Top 50)') -report.append('') -sorted_by_count = sorted(template_stats.items(), key=lambda x: x[1]['count'], reverse=True) -report.append('| Rank | Template Family | Count | Total (ms) | Avg (ms) |') -report.append('|------|-----------------|-------|------------|----------|') -for i, (name, stats) in enumerate(sorted_by_count[:50], 1): - count = stats['count'] - total = stats['total_dur'] - avg = total / count if count > 0 else 0 - display_name = name[:40] + '...' if len(name) > 40 else name - report.append(f'| {i:>4} | {display_name:<43} | {count:>5} | {total:>10.2f} | {avg:>8.2f} |') -report.append('') -report.append('## Key Insights') -report.append('') -report.append('### 1. Template Instantiation Impact') -report.append(f'- Template instantiation accounts for {100*total_template_time/total_trace_time:.1f}% of total trace time') -if len(sorted_templates) >= 10: - top10_pct = 100*sum(s[1]["total_dur"] for s in sorted_templates[:10])/total_template_time - report.append(f'- Top 10 template families account for {top10_pct:.1f}% of instantiation time') -report.append('') -report.append('### 2. Most Expensive Templates') -if len(sorted_templates) > 0: - report.append(f'- **{sorted_templates[0][0]}**: {sorted_templates[0][1]["count"]:,} instantiations, {sorted_templates[0][1]["total_dur"]/1000:.2f}s total') -if len(sorted_templates) > 1: - avg = sorted_templates[1][1]["total_dur"] / sorted_templates[1][1]["count"] - report.append(f'- **{sorted_templates[1][0]}**: {sorted_templates[1][1]["count"]:,} instantiations, {avg:.2f}ms average') -report.append('') -report.append('## Optimization Recommendations') -report.append('') -report.append('### Short Term') -report.append('1. **Focus on High-Impact Templates**: Address top 10 families first') -report.append('2. **Explicit Template Instantiation**: Pre-instantiate common configurations') -report.append('3. **Extern Templates**: Mark frequently-used templates as extern in headers') -report.append('') -report.append('### Medium Term') -report.append('1. **Precompiled Headers**: Include heavy templates in PCH') -report.append('2. **Template Specialization**: Replace general templates with specialized versions') -report.append('3. **Template Depth Reduction**: Simplify template hierarchies') -report.append('') -report.append('### Long Term') -report.append('1. **Architectural Review**: Evaluate necessity of deep template metaprogramming') -report.append('2. **C++20 Concepts**: Earlier constraint checking, fewer instantiations') -report.append('3. **Build Caching**: Distributed build cache for template instantiations') -report.append('') -report.append('## Detailed Statistics') -report.append('') -report.append(f'- **Total Unique Templates:** {len(sorted_templates)}') -report.append(f'- **Total Instantiations:** {total_inst:,}') -if total_inst > 0: - report.append(f'- **Average Instantiation Time:** {total_template_time/total_inst:.3f}ms') +# Prepare templates by time with calculated fields +templates_by_time = [] +for name, stats in sorted(template_stats.items(), key=lambda x: x[1]['total_dur'], reverse=True): + templates_by_time.append((name, { + 'count': stats['count'], + 'total_dur': stats['total_dur'], + 'avg': stats['total_dur'] / stats['count'] if stats['count'] > 0 else 0, + 'pct': 100 * stats['total_dur'] / total_template_time if total_template_time > 0 else 0 + })) + +# Prepare templates by count +templates_by_count = [] +for name, stats in sorted(template_stats.items(), key=lambda x: x[1]['count'], reverse=True): + templates_by_count.append((name, { + 'count': stats['count'], + 'total_dur': stats['total_dur'], + 'avg': stats['total_dur'] / stats['count'] if stats['count'] > 0 else 0 + })) + +# Prepare top individual instantiations with friendly type names +for inst in top_individual: + inst['inst_type'] = 'Func' if inst['type'] == 'InstantiateFunction' else 'Class' + +# Calculate additional metrics +median_count = 0 if len(template_stats) > 0: median_count = sorted([s["count"] for s in template_stats.values()])[len(template_stats)//2] - report.append(f'- **Median Template Family Count:** {median_count}') -report.append('') -report.append('---') -report.append('') -report.append(f'*Report generated using Clang -ftime-trace with {granularity}µs granularity*') -report.append(f'*Analysis tool: ck-build-analysis*') + +top10_pct = 0 +if len(templates_by_time) >= 10: + top10_pct = 100 * sum(s[1]["total_dur"] for s in templates_by_time[:10]) / total_template_time + +print('Rendering report with Jinja2...') +# Set up Jinja2 environment with custom filters +env = Environment(loader=FileSystemLoader(template_dir)) + +def format_number(value): + """Format number with thousand separators""" + return f'{value:,}' + +def truncate(value, length): + """Truncate string to length with ellipsis""" + if len(value) > length: + return value[:length-3] + '...' + return value + +def pad(value, length): + """Pad string to specified length""" + return f'{value:<{length}}' + +env.filters['format_number'] = format_number +env.filters['truncate'] = truncate +env.filters['pad'] = pad + +# Load and render template +template = env.get_template('build_analysis_report.md.jinja') +report_content = template.render( + timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + target=target, + granularity=granularity, + build_time=build_time, + trace_time_sec=f'{total_trace_time/1000:.1f}', + template_time_sec=f'{total_template_time/1000:.1f}', + template_pct=f'{100*total_template_time/total_trace_time:.1f}', + total_events=total_events, + total_instantiations=total_inst, + unique_families=len(template_stats), + total_trace_time=total_trace_time, + total_template_time=total_template_time, + phases=sorted_phases, + top_individual=top_individual, + templates_by_time=templates_by_time, + templates_by_count=templates_by_count, + median_count=median_count, + top10_pct=f'{top10_pct:.1f}' +) with open(output_file, 'w') as f: - f.write('\n'.join(report)) + f.write(report_content) print(f'Report generated: {output_file}') -print(f'Total lines: {len(report)}') +print(f'Report size: {len(report_content)} bytes') PYSCRIPT -# Copy analysis script to container and run it +# Copy analysis script and templates to container docker cp "${ANALYSIS_SCRIPT}" "${CONTAINER_NAME}:/tmp/analyze.py" +docker cp "${SCRIPT_DIR}/templates" "${CONTAINER_NAME}:/tmp/ck_build_analysis_templates" docker exec "${CONTAINER_NAME}" python3 /tmp/analyze.py \ "${TRACE_FILE}" \ "/workspace/${OUTPUT_FILE}" \ "${TARGET}" \ "${GRANULARITY}" \ - "${BUILD_TIME}" + "${BUILD_TIME}" \ + "/tmp/ck_build_analysis_templates" # Copy report back to host docker cp "${CONTAINER_NAME}:/workspace/${OUTPUT_FILE}" "${PROJECT_ROOT}/${OUTPUT_FILE}" @@ -360,6 +336,7 @@ docker cp "${CONTAINER_NAME}:/workspace/${OUTPUT_FILE}" "${PROJECT_ROOT}/${OUTPU # Cleanup rm -f "${ANALYSIS_SCRIPT}" docker exec "${CONTAINER_NAME}" rm -f /tmp/analyze.py +docker exec "${CONTAINER_NAME}" rm -rf /tmp/ck_build_analysis_templates echo "" echo "═══════════════════════════════════════════════════════════════" diff --git a/.claude/skills/templates/build_analysis_report.md.jinja b/.claude/skills/templates/build_analysis_report.md.jinja new file mode 100644 index 0000000000..b6c4b2bbf5 --- /dev/null +++ b/.claude/skills/templates/build_analysis_report.md.jinja @@ -0,0 +1,95 @@ +# Composable Kernel Build Time Analysis Report + +**Generated:** {{ timestamp }} +**Target:** {{ target }} +**Granularity:** {{ granularity }}µs + +## Executive Summary + +- **Wall Clock Time:** {{ build_time }} seconds +- **Trace Time:** {{ trace_time_sec }} seconds +- **Template Instantiation Time:** {{ template_time_sec }} seconds ({{ template_pct }}% of trace) +- **Total Events Captured:** {{ total_events|format_number }} +- **Total Template Instantiations:** {{ total_instantiations|format_number }} +- **Unique Template Families:** {{ unique_families }} + +## Compilation Phase Breakdown + +| Phase | Time (ms) | Time (s) | % of Total | +|-------|-----------|----------|------------| +{% for phase, dur in phases[:20] -%} +| {{ phase|pad(40) }} | {{ "%9.2f"|format(dur) }} | {{ "%8.2f"|format(dur/1000) }} | {{ "%9.1f"|format(100 * dur / total_trace_time) }}% | +{% endfor %} + +## Top 30 Most Expensive Individual Instantiations + +| Rank | Template | Type | Time (ms) | +|------|----------|------|-----------| +{% for inst in top_individual[:30] -%} +| {{ "%4d"|format(loop.index) }} | {{ inst.detail|truncate(70) }} | {{ inst.inst_type|pad(5) }} | {{ "%9.2f"|format(inst.dur) }} | +{% endfor %} + +## Template Families by Total Time (Top 50) + +| Rank | Template Family | Count | Total (ms) | Avg (ms) | % of Total | +|------|-----------------|-------|------------|----------|------------| +{% for name, stats in templates_by_time[:50] -%} +| {{ "%4d"|format(loop.index) }} | {{ name|truncate(43)|pad(43) }} | {{ "%5d"|format(stats.count) }} | {{ "%10.2f"|format(stats.total_dur) }} | {{ "%8.2f"|format(stats.avg) }} | {{ "%9.1f"|format(stats.pct) }}% | +{% endfor %} + +## Template Families by Instantiation Count (Top 50) + +| Rank | Template Family | Count | Total (ms) | Avg (ms) | +|------|-----------------|-------|------------|----------| +{% for name, stats in templates_by_count[:50] -%} +| {{ "%4d"|format(loop.index) }} | {{ name|truncate(43)|pad(43) }} | {{ "%5d"|format(stats.count) }} | {{ "%10.2f"|format(stats.total_dur) }} | {{ "%8.2f"|format(stats.avg) }} | +{% endfor %} + +## Key Insights + +### 1. Template Instantiation Impact +- Template instantiation accounts for {{ template_pct }}% of total trace time +{% if unique_families >= 10 -%} +- Top 10 template families account for {{ top10_pct }}% of instantiation time +{% endif %} + +### 2. Most Expensive Templates +{% if templates_by_time|length > 0 -%} +- **{{ templates_by_time[0][0] }}**: {{ templates_by_time[0][1].count|format_number }} instantiations, {{ "%.2f"|format(templates_by_time[0][1].total_dur/1000) }}s total +{% endif -%} +{% if templates_by_time|length > 1 -%} +- **{{ templates_by_time[1][0] }}**: {{ templates_by_time[1][1].count|format_number }} instantiations, {{ "%.2f"|format(templates_by_time[1][1].avg) }}ms average +{% endif %} + +## Optimization Recommendations + +### Short Term +1. **Focus on High-Impact Templates**: Address top 10 families first +2. **Explicit Template Instantiation**: Pre-instantiate common configurations +3. **Extern Templates**: Mark frequently-used templates as extern in headers + +### Medium Term +1. **Precompiled Headers**: Include heavy templates in PCH +2. **Template Specialization**: Replace general templates with specialized versions +3. **Template Depth Reduction**: Simplify template hierarchies + +### Long Term +1. **Architectural Review**: Evaluate necessity of deep template metaprogramming +2. **C++20 Concepts**: Earlier constraint checking, fewer instantiations +3. **Build Caching**: Distributed build cache for template instantiations + +## Detailed Statistics + +- **Total Unique Templates:** {{ unique_families }} +- **Total Instantiations:** {{ total_instantiations|format_number }} +{% if total_instantiations > 0 -%} +- **Average Instantiation Time:** {{ "%.3f"|format(total_template_time/total_instantiations) }}ms +{% endif -%} +{% if unique_families > 0 -%} +- **Median Template Family Count:** {{ median_count }} +{% endif %} + +--- + +*Report generated using Clang -ftime-trace with {{ granularity }}µs granularity* +*Analysis tool: ck-build-analysis*