Script improvements.

This commit is contained in:
Ville Pietilä
2025-07-10 15:41:51 +00:00
parent bac43239ff
commit 66e4ee4962
2 changed files with 122 additions and 5 deletions

View File

@@ -276,7 +276,7 @@ def plot_best_split_k_values(standard_counts, optimized_count,
plt.close()
def plot_perf(perf_difference, output_dir, suffix="", op_name=""):
def plot_perf(perf_difference, output_dir, suffix="", op_name="", label=""):
"""Plot the performance differences as a histogram with statistics."""
import numpy as np
@@ -369,7 +369,7 @@ def plot_perf(perf_difference, output_dir, suffix="", op_name=""):
plt.tight_layout()
file_name = os.path.join(output_dir, f'performance{suffix}.png')
file_name = os.path.join(output_dir, f'performance{suffix}{label}.png')
plt.savefig(file_name, dpi=150)
print(f"Saved performance chart to: {file_name}")
@@ -757,10 +757,10 @@ def get_statistics(fixed_split_k_values, fixed_split_k_times, fixed_split_k_ops,
elif best_occ_split_k_time < fixed_split_k_time and best_occ_split_k_time > tol:
best_occupancy_split_k_count += 1
perf_change.append(perf)
perf_change.append(min(150.0, perf)) # Cap to 150% to make visualization better.
elif best_occ_split_k_time > fixed_split_k_time and fixed_split_k_time > tol:
fixed_split_k_counts[fixed_split_k_value] += 1
perf_change.append(perf)
perf_change.append(min(150.0, perf)) # Cap to 150% to make visualization better.
if best_occ_split_k_time < tol and fixed_split_k_time > tol:
print(f"WARNING: Optimized time is very small for row {i}. Split-K (opt): {best_occ_split_k_value}, Split-K (standard): {fixed_split_k_value}")
@@ -771,6 +771,56 @@ def get_statistics(fixed_split_k_values, fixed_split_k_times, fixed_split_k_ops,
return perf_change, fixed_split_k_counts, fixed_equal_best_occupancy_counts, best_occupancy_split_k_count, non_standard_indices
def plot_perf_for_all_solvers(solvers_per_conv_shape, output_dir, suffix, op_name):
perf_difference = []
ranking = []
for _, values in solvers_per_conv_shape.items():
if not values:
continue
for _, fixed_split_k_tflops, _, best_occ_split_k_tflops, rank in values:
perf_diff = (best_occ_split_k_tflops / fixed_split_k_tflops) * 100.0 if fixed_split_k_tflops > 0 else 0.0
perf_difference.append(min(150.0, perf_diff))
ranking.append(rank)
plot_perf(perf_difference, output_dir, suffix=suffix, op_name=op_name, label="-all_instances")
# Create a bar chart for the ranking distribution
title = op_name if op_name else "Ranking Distribution of All Instances"
title_size = 14 if op_name else 16
plt.figure(figsize=(10, 6))
# Define the bins edges
bin_edges = range(1, max(ranking) + 2)
# Create histogram
counts, bins, patches = plt.hist(ranking, bins=bin_edges,
color='skyblue', edgecolor='black', alpha=0.7)
# Calculate the center of each bin for x-ticks
bin_centers = [bins[i] + (bins[i+1] - bins[i])/2 for i in range(len(bins)-1)]
plt.title(title, fontsize=title_size, fontweight='bold')
plt.xlabel('Rank', fontsize=12)
plt.ylabel('Count', fontsize=12)
# Add explanation text middle top
y_loc = 0.9*max(counts)
explanation = "Candidate split-K values ['best occupancy', 1, 2, 4, 8, 16, 32, 64, 128].\n" \
"Ranking of 'best occupancy' value for each solver instance\n" \
"Rank 1 is the best, rank 2 is second best, etc."
plt.text(2.5, y_loc, explanation)
# Set x-ticks at the center of each bar
plt.xticks(bin_centers, range(1, max(ranking) + 1))
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
rank_distribution_path = os.path.join(output_dir, f'ranking_distribution{suffix}.png')
plt.savefig(rank_distribution_path, dpi=150)
print(f"Saved ranking distribution chart to: {rank_distribution_path}")
def main():
args = parse_cli_args()
@@ -824,9 +874,72 @@ def main():
# 17 - strategy
# 18 - total number of candidate ops.
# Columns 19-30 are
# 19: op_name
# 20: fixed_split_k_time
# 21: fixed_split_k_tflops
# 22: fixed_split_k_value
# 23: rank_fixed_split_k
# 24: strategy (FixedSplitK)
# 25: best_occupancy_split_k_time
# 26: best_occupancy_split_k_tflops
# 27: best_occupancy_split_k_value
# 28: rank_best_occupancy_split_k
# 29: strategy (BestOccupancy)
# 30: total number of candidate values
# This repeats for size=12 blocks, i.e., the next 12 elemnts from 31-42 have the same structure if they are not null.
# Collect these elents into a dictionary
# where each key is the profiler_command and the value is a list of tuples containing the values for each block.
solvers_per_conv_shape = defaultdict(list)
offset = 18
size = 12
for i in range(len(profiler_commands)):
profiler_command = profiler_commands.iloc[i]
#print(f"Processing profiler command: {profiler_command}, row: {i}")
if pd.isna(profiler_command):
continue
if profiler_command not in solvers_per_conv_shape:
solvers_per_conv_shape[profiler_command] = []
for j in range(0, len(df.columns) - size - offset, size):
op_name = df.iloc[i, offset + j + 1]
if pd.isna(op_name):
continue
try:
loc_fixed_split_k_time = float(df.iloc[i, offset + j + 2])
loc_fixed_split_k_tflops = float(df.iloc[i, offset + j + 3])
loc_fixed_split_k_value = int(df.iloc[i, offset + j + 4])
loc_rank_fixed_split_k = int(df.iloc[i, offset + j + 5])
loc_strategy_fixed_split_k = df.iloc[i, offset + j + 6]
loc_best_occupancy_split_k_time = float(df.iloc[i, offset + j + 7])
loc_best_occupancy_split_k_tflops = float(df.iloc[i, offset + j + 8])
loc_best_occupancy_split_k_value = int(df.iloc[i, offset + j + 9])
loc_rank_best_occupancy_split_k = int(df.iloc[i, offset + j + 10])
loc_strategy_best_occupancy_split_k = df.iloc[i, offset + j + 11]
loc_num_candidates = int(df.iloc[i, offset + j + 12])
assert loc_strategy_fixed_split_k == "SplitKStrategy::FixedSplitK", \
f"Expected strategy_fixed_split_k to be 'SplitKStrategy::FixedSplitK', got {loc_strategy_fixed_split_k}."
assert loc_strategy_best_occupancy_split_k == "SplitKStrategy::BestOccupancy", \
f"Expected strategy_best_occupancy_split_k to be 'SplitKStrategy::BestOccupancy', got {loc_strategy_best_occupancy_split_k}."
# Candidates: {-1, 1, 2, 4, 8, 16, 32, 64, 128}
# Sometime the split-K value can be incompatible with the V3 pipeline and we have may less than 9 candidates.
assert loc_num_candidates <= 9 and loc_num_candidates > 1, \
f"Expected num_candidates to be 9, got {loc_num_candidates}."
assert loc_rank_best_occupancy_split_k >= 1 and loc_rank_best_occupancy_split_k <= 9, \
f"Expected rank_best_occupancy_split_k to be between 1 and 9, got {loc_rank_best_occupancy_split_k}."
solvers_per_conv_shape[profiler_command].append(
(loc_fixed_split_k_value, loc_fixed_split_k_tflops, loc_best_occupancy_split_k_value, loc_best_occupancy_split_k_tflops, loc_rank_best_occupancy_split_k))
except (ValueError, TypeError) as e:
print(f"Warning: Could not process row {i}, block {j}: {e}. Skipping this block.")
continue
op_name = fixed_split_k_ops.iloc[0].split("<")[0]
suffix = f"_{args.label}" if args.label else ""
plot_perf_for_all_solvers(solvers_per_conv_shape, args.output_dir, suffix, op_name)
G, N, K, C, Y, X, Ho, Wo = get_convolution_shapes(profiler_commands)
plot_tSNE_performance(G,N,K,C,Y,X,Ho,Wo, fixed_split_k_tflops.astype(float).values, best_occupancy_split_k_tflops.astype(float).values, args.output_dir, suffix, op_name)

View File

@@ -45,7 +45,11 @@ def run_ck_profiler_cmd(cmd, disabled_ops, run_id, log_to_stdout=False):
subprocess.run(cmd, env=env_vars)
else:
with open(os.devnull, 'w') as devnull:
subprocess.run(cmd, env=env_vars, stdout=devnull)
timeoutInSec = 300
try:
subprocess.run(cmd, env=env_vars, stdout=devnull, timeout=timeoutInSec)
except subprocess.TimeoutExpired:
print(f"Command '{cmd_str}' timed out after {timeoutInSec} seconds.", file=sys.stderr)
def get_profiler_commands(csv_file):
profiler_commands = []