mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-30 11:47:48 +00:00
Refactor report generation to use Jinja2 templates
- Add Jinja2 template for report generation (.claude/skills/templates/build_analysis_report.md.jinja) - Refactor analysis script to use template rendering instead of string concatenation - Add custom Jinja2 filters for formatting (format_number, truncate, pad) - Separate presentation from logic for better maintainability - Template makes report format easier to modify and extend Requirements: - python3-jinja2 must be installed in Docker container (apt-get install python3-jinja2) Benefits: - Cleaner code with separation of concerns - Easier to customize report format - Better readability and maintainability Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -183,9 +183,10 @@ import re
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
if len(sys.argv) < 4:
|
||||
print("Usage: analyze.py <trace_file> <output_file> <target> <granularity> <build_time>")
|
||||
print("Usage: analyze.py <trace_file> <output_file> <target> <granularity> <build_time> <template_dir>")
|
||||
sys.exit(1)
|
||||
|
||||
trace_file = sys.argv[1]
|
||||
@@ -193,6 +194,7 @@ output_file = sys.argv[2]
|
||||
target = sys.argv[3]
|
||||
granularity = sys.argv[4]
|
||||
build_time = sys.argv[5]
|
||||
template_dir = sys.argv[6]
|
||||
|
||||
print(f'Loading trace file: {trace_file}')
|
||||
with open(trace_file, 'r') as f:
|
||||
@@ -223,8 +225,7 @@ for event in data.get('traceEvents', []):
|
||||
template_stats[template_name]['count'] += 1
|
||||
template_stats[template_name]['total_dur'] += dur
|
||||
|
||||
print('Sorting and generating report...')
|
||||
sorted_templates = sorted(template_stats.items(), key=lambda x: x[1]['total_dur'], reverse=True)
|
||||
print('Sorting data...')
|
||||
sorted_phases = sorted(phase_stats.items(), key=lambda x: x[1], reverse=True)
|
||||
top_individual.sort(key=lambda x: x['dur'], reverse=True)
|
||||
|
||||
@@ -233,126 +234,101 @@ total_trace_time = sum(phase_stats.values())
|
||||
total_events = len(data.get('traceEvents', []))
|
||||
total_inst = sum(s['count'] for s in template_stats.values())
|
||||
|
||||
report = []
|
||||
report.append('# Composable Kernel Build Time Analysis Report')
|
||||
report.append('')
|
||||
report.append(f'**Generated:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
|
||||
report.append(f'**Target:** {target}')
|
||||
report.append(f'**Granularity:** {granularity}µs')
|
||||
report.append('')
|
||||
report.append('## Executive Summary')
|
||||
report.append('')
|
||||
report.append(f'- **Wall Clock Time:** {build_time} seconds')
|
||||
report.append(f'- **Trace Time:** {total_trace_time/1000:.1f} seconds')
|
||||
report.append(f'- **Template Instantiation Time:** {total_template_time/1000:.1f} seconds ({100*total_template_time/total_trace_time:.1f}% of trace)')
|
||||
report.append(f'- **Total Events Captured:** {total_events:,}')
|
||||
report.append(f'- **Total Template Instantiations:** {total_inst:,}')
|
||||
report.append(f'- **Unique Template Families:** {len(sorted_templates)}')
|
||||
report.append('')
|
||||
report.append('## Compilation Phase Breakdown')
|
||||
report.append('')
|
||||
report.append('| Phase | Time (ms) | Time (s) | % of Total |')
|
||||
report.append('|-------|-----------|----------|------------|')
|
||||
for phase, dur in sorted_phases[:20]:
|
||||
pct = 100 * dur / total_trace_time
|
||||
report.append(f'| {phase:<40} | {dur:>9.2f} | {dur/1000:>8.2f} | {pct:>9.1f}% |')
|
||||
report.append('')
|
||||
report.append('## Top 30 Most Expensive Individual Instantiations')
|
||||
report.append('')
|
||||
report.append('| Rank | Template | Type | Time (ms) |')
|
||||
report.append('|------|----------|------|-----------|')
|
||||
for i, inst in enumerate(top_individual[:30], 1):
|
||||
detail = inst['detail'][:70] + '...' if len(inst['detail']) > 70 else inst['detail']
|
||||
inst_type = 'Func' if inst['type'] == 'InstantiateFunction' else 'Class'
|
||||
report.append(f'| {i:>4} | {detail:<70} | {inst_type:<5} | {inst["dur"]:>9.2f} |')
|
||||
report.append('')
|
||||
report.append('## Template Families by Total Time (Top 50)')
|
||||
report.append('')
|
||||
report.append('| Rank | Template Family | Count | Total (ms) | Avg (ms) | % of Total |')
|
||||
report.append('|------|-----------------|-------|------------|----------|------------|')
|
||||
for i, (name, stats) in enumerate(sorted_templates[:50], 1):
|
||||
count = stats['count']
|
||||
total = stats['total_dur']
|
||||
avg = total / count if count > 0 else 0
|
||||
pct = 100 * total / total_template_time if total_template_time > 0 else 0
|
||||
display_name = name[:40] + '...' if len(name) > 40 else name
|
||||
report.append(f'| {i:>4} | {display_name:<43} | {count:>5} | {total:>10.2f} | {avg:>8.2f} | {pct:>9.1f}% |')
|
||||
report.append('')
|
||||
report.append('## Template Families by Instantiation Count (Top 50)')
|
||||
report.append('')
|
||||
sorted_by_count = sorted(template_stats.items(), key=lambda x: x[1]['count'], reverse=True)
|
||||
report.append('| Rank | Template Family | Count | Total (ms) | Avg (ms) |')
|
||||
report.append('|------|-----------------|-------|------------|----------|')
|
||||
for i, (name, stats) in enumerate(sorted_by_count[:50], 1):
|
||||
count = stats['count']
|
||||
total = stats['total_dur']
|
||||
avg = total / count if count > 0 else 0
|
||||
display_name = name[:40] + '...' if len(name) > 40 else name
|
||||
report.append(f'| {i:>4} | {display_name:<43} | {count:>5} | {total:>10.2f} | {avg:>8.2f} |')
|
||||
report.append('')
|
||||
report.append('## Key Insights')
|
||||
report.append('')
|
||||
report.append('### 1. Template Instantiation Impact')
|
||||
report.append(f'- Template instantiation accounts for {100*total_template_time/total_trace_time:.1f}% of total trace time')
|
||||
if len(sorted_templates) >= 10:
|
||||
top10_pct = 100*sum(s[1]["total_dur"] for s in sorted_templates[:10])/total_template_time
|
||||
report.append(f'- Top 10 template families account for {top10_pct:.1f}% of instantiation time')
|
||||
report.append('')
|
||||
report.append('### 2. Most Expensive Templates')
|
||||
if len(sorted_templates) > 0:
|
||||
report.append(f'- **{sorted_templates[0][0]}**: {sorted_templates[0][1]["count"]:,} instantiations, {sorted_templates[0][1]["total_dur"]/1000:.2f}s total')
|
||||
if len(sorted_templates) > 1:
|
||||
avg = sorted_templates[1][1]["total_dur"] / sorted_templates[1][1]["count"]
|
||||
report.append(f'- **{sorted_templates[1][0]}**: {sorted_templates[1][1]["count"]:,} instantiations, {avg:.2f}ms average')
|
||||
report.append('')
|
||||
report.append('## Optimization Recommendations')
|
||||
report.append('')
|
||||
report.append('### Short Term')
|
||||
report.append('1. **Focus on High-Impact Templates**: Address top 10 families first')
|
||||
report.append('2. **Explicit Template Instantiation**: Pre-instantiate common configurations')
|
||||
report.append('3. **Extern Templates**: Mark frequently-used templates as extern in headers')
|
||||
report.append('')
|
||||
report.append('### Medium Term')
|
||||
report.append('1. **Precompiled Headers**: Include heavy templates in PCH')
|
||||
report.append('2. **Template Specialization**: Replace general templates with specialized versions')
|
||||
report.append('3. **Template Depth Reduction**: Simplify template hierarchies')
|
||||
report.append('')
|
||||
report.append('### Long Term')
|
||||
report.append('1. **Architectural Review**: Evaluate necessity of deep template metaprogramming')
|
||||
report.append('2. **C++20 Concepts**: Earlier constraint checking, fewer instantiations')
|
||||
report.append('3. **Build Caching**: Distributed build cache for template instantiations')
|
||||
report.append('')
|
||||
report.append('## Detailed Statistics')
|
||||
report.append('')
|
||||
report.append(f'- **Total Unique Templates:** {len(sorted_templates)}')
|
||||
report.append(f'- **Total Instantiations:** {total_inst:,}')
|
||||
if total_inst > 0:
|
||||
report.append(f'- **Average Instantiation Time:** {total_template_time/total_inst:.3f}ms')
|
||||
# Prepare templates by time with calculated fields
|
||||
templates_by_time = []
|
||||
for name, stats in sorted(template_stats.items(), key=lambda x: x[1]['total_dur'], reverse=True):
|
||||
templates_by_time.append((name, {
|
||||
'count': stats['count'],
|
||||
'total_dur': stats['total_dur'],
|
||||
'avg': stats['total_dur'] / stats['count'] if stats['count'] > 0 else 0,
|
||||
'pct': 100 * stats['total_dur'] / total_template_time if total_template_time > 0 else 0
|
||||
}))
|
||||
|
||||
# Prepare templates by count
|
||||
templates_by_count = []
|
||||
for name, stats in sorted(template_stats.items(), key=lambda x: x[1]['count'], reverse=True):
|
||||
templates_by_count.append((name, {
|
||||
'count': stats['count'],
|
||||
'total_dur': stats['total_dur'],
|
||||
'avg': stats['total_dur'] / stats['count'] if stats['count'] > 0 else 0
|
||||
}))
|
||||
|
||||
# Prepare top individual instantiations with friendly type names
|
||||
for inst in top_individual:
|
||||
inst['inst_type'] = 'Func' if inst['type'] == 'InstantiateFunction' else 'Class'
|
||||
|
||||
# Calculate additional metrics
|
||||
median_count = 0
|
||||
if len(template_stats) > 0:
|
||||
median_count = sorted([s["count"] for s in template_stats.values()])[len(template_stats)//2]
|
||||
report.append(f'- **Median Template Family Count:** {median_count}')
|
||||
report.append('')
|
||||
report.append('---')
|
||||
report.append('')
|
||||
report.append(f'*Report generated using Clang -ftime-trace with {granularity}µs granularity*')
|
||||
report.append(f'*Analysis tool: ck-build-analysis*')
|
||||
|
||||
top10_pct = 0
|
||||
if len(templates_by_time) >= 10:
|
||||
top10_pct = 100 * sum(s[1]["total_dur"] for s in templates_by_time[:10]) / total_template_time
|
||||
|
||||
print('Rendering report with Jinja2...')
|
||||
# Set up Jinja2 environment with custom filters
|
||||
env = Environment(loader=FileSystemLoader(template_dir))
|
||||
|
||||
def format_number(value):
|
||||
"""Format number with thousand separators"""
|
||||
return f'{value:,}'
|
||||
|
||||
def truncate(value, length):
|
||||
"""Truncate string to length with ellipsis"""
|
||||
if len(value) > length:
|
||||
return value[:length-3] + '...'
|
||||
return value
|
||||
|
||||
def pad(value, length):
|
||||
"""Pad string to specified length"""
|
||||
return f'{value:<{length}}'
|
||||
|
||||
env.filters['format_number'] = format_number
|
||||
env.filters['truncate'] = truncate
|
||||
env.filters['pad'] = pad
|
||||
|
||||
# Load and render template
|
||||
template = env.get_template('build_analysis_report.md.jinja')
|
||||
report_content = template.render(
|
||||
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
target=target,
|
||||
granularity=granularity,
|
||||
build_time=build_time,
|
||||
trace_time_sec=f'{total_trace_time/1000:.1f}',
|
||||
template_time_sec=f'{total_template_time/1000:.1f}',
|
||||
template_pct=f'{100*total_template_time/total_trace_time:.1f}',
|
||||
total_events=total_events,
|
||||
total_instantiations=total_inst,
|
||||
unique_families=len(template_stats),
|
||||
total_trace_time=total_trace_time,
|
||||
total_template_time=total_template_time,
|
||||
phases=sorted_phases,
|
||||
top_individual=top_individual,
|
||||
templates_by_time=templates_by_time,
|
||||
templates_by_count=templates_by_count,
|
||||
median_count=median_count,
|
||||
top10_pct=f'{top10_pct:.1f}'
|
||||
)
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
f.write('\n'.join(report))
|
||||
f.write(report_content)
|
||||
|
||||
print(f'Report generated: {output_file}')
|
||||
print(f'Total lines: {len(report)}')
|
||||
print(f'Report size: {len(report_content)} bytes')
|
||||
PYSCRIPT
|
||||
|
||||
# Copy analysis script to container and run it
|
||||
# Copy analysis script and templates to container
|
||||
docker cp "${ANALYSIS_SCRIPT}" "${CONTAINER_NAME}:/tmp/analyze.py"
|
||||
docker cp "${SCRIPT_DIR}/templates" "${CONTAINER_NAME}:/tmp/ck_build_analysis_templates"
|
||||
|
||||
docker exec "${CONTAINER_NAME}" python3 /tmp/analyze.py \
|
||||
"${TRACE_FILE}" \
|
||||
"/workspace/${OUTPUT_FILE}" \
|
||||
"${TARGET}" \
|
||||
"${GRANULARITY}" \
|
||||
"${BUILD_TIME}"
|
||||
"${BUILD_TIME}" \
|
||||
"/tmp/ck_build_analysis_templates"
|
||||
|
||||
# Copy report back to host
|
||||
docker cp "${CONTAINER_NAME}:/workspace/${OUTPUT_FILE}" "${PROJECT_ROOT}/${OUTPUT_FILE}"
|
||||
@@ -360,6 +336,7 @@ docker cp "${CONTAINER_NAME}:/workspace/${OUTPUT_FILE}" "${PROJECT_ROOT}/${OUTPU
|
||||
# Cleanup
|
||||
rm -f "${ANALYSIS_SCRIPT}"
|
||||
docker exec "${CONTAINER_NAME}" rm -f /tmp/analyze.py
|
||||
docker exec "${CONTAINER_NAME}" rm -rf /tmp/ck_build_analysis_templates
|
||||
|
||||
echo ""
|
||||
echo "═══════════════════════════════════════════════════════════════"
|
||||
|
||||
95
.claude/skills/templates/build_analysis_report.md.jinja
Normal file
95
.claude/skills/templates/build_analysis_report.md.jinja
Normal file
@@ -0,0 +1,95 @@
|
||||
# Composable Kernel Build Time Analysis Report
|
||||
|
||||
**Generated:** {{ timestamp }}
|
||||
**Target:** {{ target }}
|
||||
**Granularity:** {{ granularity }}µs
|
||||
|
||||
## Executive Summary
|
||||
|
||||
- **Wall Clock Time:** {{ build_time }} seconds
|
||||
- **Trace Time:** {{ trace_time_sec }} seconds
|
||||
- **Template Instantiation Time:** {{ template_time_sec }} seconds ({{ template_pct }}% of trace)
|
||||
- **Total Events Captured:** {{ total_events|format_number }}
|
||||
- **Total Template Instantiations:** {{ total_instantiations|format_number }}
|
||||
- **Unique Template Families:** {{ unique_families }}
|
||||
|
||||
## Compilation Phase Breakdown
|
||||
|
||||
| Phase | Time (ms) | Time (s) | % of Total |
|
||||
|-------|-----------|----------|------------|
|
||||
{% for phase, dur in phases[:20] -%}
|
||||
| {{ phase|pad(40) }} | {{ "%9.2f"|format(dur) }} | {{ "%8.2f"|format(dur/1000) }} | {{ "%9.1f"|format(100 * dur / total_trace_time) }}% |
|
||||
{% endfor %}
|
||||
|
||||
## Top 30 Most Expensive Individual Instantiations
|
||||
|
||||
| Rank | Template | Type | Time (ms) |
|
||||
|------|----------|------|-----------|
|
||||
{% for inst in top_individual[:30] -%}
|
||||
| {{ "%4d"|format(loop.index) }} | {{ inst.detail|truncate(70) }} | {{ inst.inst_type|pad(5) }} | {{ "%9.2f"|format(inst.dur) }} |
|
||||
{% endfor %}
|
||||
|
||||
## Template Families by Total Time (Top 50)
|
||||
|
||||
| Rank | Template Family | Count | Total (ms) | Avg (ms) | % of Total |
|
||||
|------|-----------------|-------|------------|----------|------------|
|
||||
{% for name, stats in templates_by_time[:50] -%}
|
||||
| {{ "%4d"|format(loop.index) }} | {{ name|truncate(43)|pad(43) }} | {{ "%5d"|format(stats.count) }} | {{ "%10.2f"|format(stats.total_dur) }} | {{ "%8.2f"|format(stats.avg) }} | {{ "%9.1f"|format(stats.pct) }}% |
|
||||
{% endfor %}
|
||||
|
||||
## Template Families by Instantiation Count (Top 50)
|
||||
|
||||
| Rank | Template Family | Count | Total (ms) | Avg (ms) |
|
||||
|------|-----------------|-------|------------|----------|
|
||||
{% for name, stats in templates_by_count[:50] -%}
|
||||
| {{ "%4d"|format(loop.index) }} | {{ name|truncate(43)|pad(43) }} | {{ "%5d"|format(stats.count) }} | {{ "%10.2f"|format(stats.total_dur) }} | {{ "%8.2f"|format(stats.avg) }} |
|
||||
{% endfor %}
|
||||
|
||||
## Key Insights
|
||||
|
||||
### 1. Template Instantiation Impact
|
||||
- Template instantiation accounts for {{ template_pct }}% of total trace time
|
||||
{% if unique_families >= 10 -%}
|
||||
- Top 10 template families account for {{ top10_pct }}% of instantiation time
|
||||
{% endif %}
|
||||
|
||||
### 2. Most Expensive Templates
|
||||
{% if templates_by_time|length > 0 -%}
|
||||
- **{{ templates_by_time[0][0] }}**: {{ templates_by_time[0][1].count|format_number }} instantiations, {{ "%.2f"|format(templates_by_time[0][1].total_dur/1000) }}s total
|
||||
{% endif -%}
|
||||
{% if templates_by_time|length > 1 -%}
|
||||
- **{{ templates_by_time[1][0] }}**: {{ templates_by_time[1][1].count|format_number }} instantiations, {{ "%.2f"|format(templates_by_time[1][1].avg) }}ms average
|
||||
{% endif %}
|
||||
|
||||
## Optimization Recommendations
|
||||
|
||||
### Short Term
|
||||
1. **Focus on High-Impact Templates**: Address top 10 families first
|
||||
2. **Explicit Template Instantiation**: Pre-instantiate common configurations
|
||||
3. **Extern Templates**: Mark frequently-used templates as extern in headers
|
||||
|
||||
### Medium Term
|
||||
1. **Precompiled Headers**: Include heavy templates in PCH
|
||||
2. **Template Specialization**: Replace general templates with specialized versions
|
||||
3. **Template Depth Reduction**: Simplify template hierarchies
|
||||
|
||||
### Long Term
|
||||
1. **Architectural Review**: Evaluate necessity of deep template metaprogramming
|
||||
2. **C++20 Concepts**: Earlier constraint checking, fewer instantiations
|
||||
3. **Build Caching**: Distributed build cache for template instantiations
|
||||
|
||||
## Detailed Statistics
|
||||
|
||||
- **Total Unique Templates:** {{ unique_families }}
|
||||
- **Total Instantiations:** {{ total_instantiations|format_number }}
|
||||
{% if total_instantiations > 0 -%}
|
||||
- **Average Instantiation Time:** {{ "%.3f"|format(total_template_time/total_instantiations) }}ms
|
||||
{% endif -%}
|
||||
{% if unique_families > 0 -%}
|
||||
- **Median Template Family Count:** {{ median_count }}
|
||||
{% endif %}
|
||||
|
||||
---
|
||||
|
||||
*Report generated using Clang -ftime-trace with {{ granularity }}µs granularity*
|
||||
*Analysis tool: ck-build-analysis*
|
||||
Reference in New Issue
Block a user