mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-30 03:37:38 +00:00
Performance analysis script.
This commit is contained in:
213
script/perf-analysis.py
Executable file
213
script/perf-analysis.py
Executable file
@@ -0,0 +1,213 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import sys
|
||||
import pandas as pd
|
||||
import csv
|
||||
import matplotlib
|
||||
from collections import defaultdict
|
||||
import numpy as np
|
||||
|
||||
matplotlib.use('Agg') # Use a non-interactive backend
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
def parse_cli_args():
|
||||
"""Parse command line arguments"""
|
||||
parser = argparse.ArgumentParser(description="Analyze convolution test results.")
|
||||
parser.add_argument("--perf-file", type=str, required=True, help="Path to the perf results.")
|
||||
parser.add_argument("--baseline-perf-file", type=str, required=True, help="Path to the baseline perf results.")
|
||||
parser.add_argument("--output-dir", type=str, required=True, help="Directory to save output plots.")
|
||||
parser.add_argument("--kernel", type=str, required=True, default="", help="Kernel for which the performance is studied.")
|
||||
|
||||
|
||||
args, unknown_args = parser.parse_known_args()
|
||||
|
||||
if unknown_args:
|
||||
print(f"Unknown arguments: {unknown_args}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
return args
|
||||
|
||||
def extract_perf_data(line):
|
||||
import re
|
||||
tflops_pattern = r'([\d.]+)\s+TFlops'
|
||||
tflops_match = re.search(tflops_pattern, line)
|
||||
|
||||
if not tflops_match:
|
||||
return None, None
|
||||
|
||||
tflops = float(tflops_match.group(1))
|
||||
|
||||
# Use a pattern that captures everything between "GB/s, " and ", SplitK"
|
||||
# This pattern handles nested brackets by using a recursive-like approach
|
||||
kernel_pattern = r'GB/s,\s+(.*?),\s+SplitK(?:\s+\-?\d+)?'
|
||||
kernel_match = re.search(kernel_pattern, line)
|
||||
|
||||
if kernel_match:
|
||||
kernel_name = kernel_match.group(1).strip()
|
||||
return tflops, kernel_name
|
||||
else:
|
||||
return None, None
|
||||
|
||||
def get_tflops_per_kernel(file):
|
||||
res = defaultdict(list)
|
||||
with open(file, 'r') as f:
|
||||
lines = f.readlines()
|
||||
n_lines = len(lines)
|
||||
for i, line in enumerate(lines):
|
||||
#print(f"Processing line {i + 1}/{n_lines}")
|
||||
tflops, kernel_name = extract_perf_data(line)
|
||||
assert tflops is not None, f"Failed to extract TFlops from line: {line.strip()}"
|
||||
assert kernel_name is not None, f"Failed to extract kernel name from line: {line.strip()}"
|
||||
res[kernel_name].append(tflops)
|
||||
|
||||
return res
|
||||
|
||||
def filter_by_kernel(perf_results, kernel):
|
||||
"""Filter performance results by kernel name."""
|
||||
if kernel:
|
||||
return {k: v for k, v in perf_results.items() if kernel == k.split('<')[0]}
|
||||
return perf_results
|
||||
|
||||
def plot_perf_difference(perf_difference, output_dir, label=""):
|
||||
"""Plot the performance differences as a histogram with statistics."""
|
||||
import numpy as np
|
||||
|
||||
mean_val = np.mean(perf_difference)
|
||||
median_val = np.median(perf_difference)
|
||||
std_val = np.std(perf_difference)
|
||||
min_val = np.min(perf_difference)
|
||||
max_val = np.max(perf_difference)
|
||||
p25 = np.percentile(perf_difference, 25)
|
||||
p75 = np.percentile(perf_difference, 75)
|
||||
count = len(perf_difference)
|
||||
|
||||
bin_width = 1
|
||||
min_edge = np.floor(min_val / bin_width) * bin_width
|
||||
max_edge = np.ceil(max_val / bin_width) * bin_width
|
||||
bin_edges = np.arange(min_edge, max_edge + bin_width, bin_width)
|
||||
|
||||
plt.figure(figsize=(12, 6))
|
||||
|
||||
below_100 = [x for x in perf_difference if x < 100]
|
||||
above_100 = [x for x in perf_difference if x >= 100]
|
||||
|
||||
if below_100:
|
||||
counts_below, _ = np.histogram(below_100, bins=bin_edges)
|
||||
else:
|
||||
counts_below = np.zeros(len(bin_edges) - 1)
|
||||
|
||||
if above_100:
|
||||
counts_above, _ = np.histogram(above_100, bins=bin_edges)
|
||||
else:
|
||||
counts_above = np.zeros(len(bin_edges) - 1)
|
||||
|
||||
if below_100:
|
||||
plt.hist(below_100, bins=bin_edges, color='red',
|
||||
alpha=0.7, edgecolor='black', label='Below 100%')
|
||||
|
||||
if above_100:
|
||||
plt.hist(above_100, bins=bin_edges, color='green',
|
||||
alpha=0.7, edgecolor='black', label='Above 100%')
|
||||
|
||||
total_counts = counts_below + counts_above
|
||||
|
||||
for i in range(len(bin_edges) - 1):
|
||||
if total_counts[i] > 0:
|
||||
bin_center = (bin_edges[i] + bin_edges[i + 1]) / 2
|
||||
|
||||
plt.text(
|
||||
bin_center,
|
||||
total_counts[i] + 0.5,
|
||||
f'{int(total_counts[i])}',
|
||||
ha='center',
|
||||
va='bottom',
|
||||
fontweight='bold',
|
||||
fontsize=9
|
||||
)
|
||||
|
||||
stats_text = (f"Statistics:\n"
|
||||
f"Count: {count}\n"
|
||||
f"Mean: {mean_val:.2f}%\n"
|
||||
f"Median: {median_val:.2f}%\n"
|
||||
f"Std Dev: {std_val:.2f}%\n"
|
||||
f"Min: {min_val:.2f}%\n"
|
||||
f"Max: {max_val:.2f}%\n"
|
||||
f"25th Percentile: {p25:.2f}%\n"
|
||||
f"75th Percentile: {p75:.2f}%")
|
||||
|
||||
title = "Performance of packed cast vs. baseline elementwise"
|
||||
size = 12
|
||||
plt.title(title,
|
||||
fontsize=size, fontweight='bold')
|
||||
plt.xlabel('Performance (%)', fontsize=12)
|
||||
plt.ylabel('Count', fontsize=12)
|
||||
plt.grid(True, linestyle='--', alpha=0.7)
|
||||
plt.xticks(bin_edges)
|
||||
plt.text(0.02, 0.97, stats_text, transform=plt.gca().transAxes, fontsize=10,
|
||||
verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
|
||||
plt.axvline(x=100, color='black', linestyle='--', alpha=0.9, linewidth=2,
|
||||
label='100% Threshold')
|
||||
|
||||
below_count = len(below_100)
|
||||
above_count = len(above_100)
|
||||
below_percent = (below_count / count) * 100 if count > 0 else 0
|
||||
above_percent = (above_count / count) * 100 if count > 0 else 0
|
||||
|
||||
legend =plt.legend([
|
||||
f'Below 100% ({below_count}, {below_percent:.1f}%)',
|
||||
f'Above 100% ({above_count}, {above_percent:.1f}%)',
|
||||
'100% Threshold'
|
||||
])
|
||||
legend.set_bbox_to_anchor((0.225, 0.65))
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
file_name = os.path.join(output_dir, f'performance_{label}.png')
|
||||
plt.savefig(file_name, dpi=150)
|
||||
print(f"Saved performance chart to: {file_name}")
|
||||
|
||||
plt.close()
|
||||
|
||||
def plot_perf(perf, baseline_perf, kernel, output_dir):
|
||||
"""Plot the performance difference between the current and baseline results."""
|
||||
|
||||
perf_difference = []
|
||||
for k in perf:
|
||||
if k in baseline_perf:
|
||||
perf_list = perf[k]
|
||||
baseline_perf_list = baseline_perf[k]
|
||||
if len(perf_list) != len(baseline_perf_list):
|
||||
raise ValueError(f"Performance lists for kernel {k} have different lengths: {len(perf_list)} vs {len(baseline_perf_list)}")
|
||||
|
||||
for i in range(len(perf_list)):
|
||||
diff = 100.0 * (perf_list[i] / baseline_perf_list[i])
|
||||
perf_difference.append(diff)
|
||||
else:
|
||||
raise ValueError(f"Kernel {k} not found in baseline performance data.")
|
||||
|
||||
plot_perf_difference(perf_difference, output_dir, label=kernel)
|
||||
|
||||
def main():
|
||||
args = parse_cli_args()
|
||||
|
||||
perf = get_tflops_per_kernel(args.perf_file)
|
||||
baseline_perf = get_tflops_per_kernel(args.baseline_perf_file)
|
||||
|
||||
print(f"Found in total {len(perf)} different kernels in perf file.")
|
||||
print(f"Found in total {len(baseline_perf)} different kernels in baseline perf file.")
|
||||
|
||||
kernel_perf = filter_by_kernel(perf, args.kernel)
|
||||
baseline_kernel_perf = filter_by_kernel(baseline_perf, args.kernel)
|
||||
|
||||
print(f"Found {len(kernel_perf)} instances of {args.kernel} in perf file.")
|
||||
print(f"Found {len(baseline_kernel_perf)} instances of {args.kernel} in baseline perf file.")
|
||||
|
||||
if os.path.exists(args.output_dir) is False:
|
||||
os.makedirs(args.output_dir)
|
||||
|
||||
plot_perf(kernel_perf, baseline_kernel_perf, args.kernel, args.output_dir)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user