From 66e4ee49626828ecb87e2ae8561600cafadd4a2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= <> Date: Thu, 10 Jul 2025 15:41:51 +0000 Subject: [PATCH] Script improvements. --- script/analyze_conv_tests.py | 121 +++++++++++++++++++++++++++++++++-- script/run_conv_profiler.py | 6 +- 2 files changed, 122 insertions(+), 5 deletions(-) diff --git a/script/analyze_conv_tests.py b/script/analyze_conv_tests.py index 19a3c48aaa..fd64a05a88 100644 --- a/script/analyze_conv_tests.py +++ b/script/analyze_conv_tests.py @@ -276,7 +276,7 @@ def plot_best_split_k_values(standard_counts, optimized_count, plt.close() -def plot_perf(perf_difference, output_dir, suffix="", op_name=""): +def plot_perf(perf_difference, output_dir, suffix="", op_name="", label=""): """Plot the performance differences as a histogram with statistics.""" import numpy as np @@ -369,7 +369,7 @@ def plot_perf(perf_difference, output_dir, suffix="", op_name=""): plt.tight_layout() - file_name = os.path.join(output_dir, f'performance{suffix}.png') + file_name = os.path.join(output_dir, f'performance{suffix}{label}.png') plt.savefig(file_name, dpi=150) print(f"Saved performance chart to: {file_name}") @@ -757,10 +757,10 @@ def get_statistics(fixed_split_k_values, fixed_split_k_times, fixed_split_k_ops, elif best_occ_split_k_time < fixed_split_k_time and best_occ_split_k_time > tol: best_occupancy_split_k_count += 1 - perf_change.append(perf) + perf_change.append(min(150.0, perf)) # Cap to 150% to make visualization better. elif best_occ_split_k_time > fixed_split_k_time and fixed_split_k_time > tol: fixed_split_k_counts[fixed_split_k_value] += 1 - perf_change.append(perf) + perf_change.append(min(150.0, perf)) # Cap to 150% to make visualization better. if best_occ_split_k_time < tol and fixed_split_k_time > tol: print(f"WARNING: Optimized time is very small for row {i}. Split-K (opt): {best_occ_split_k_value}, Split-K (standard): {fixed_split_k_value}") @@ -771,6 +771,56 @@ def get_statistics(fixed_split_k_values, fixed_split_k_times, fixed_split_k_ops, return perf_change, fixed_split_k_counts, fixed_equal_best_occupancy_counts, best_occupancy_split_k_count, non_standard_indices +def plot_perf_for_all_solvers(solvers_per_conv_shape, output_dir, suffix, op_name): + + perf_difference = [] + ranking = [] + for _, values in solvers_per_conv_shape.items(): + if not values: + continue + + for _, fixed_split_k_tflops, _, best_occ_split_k_tflops, rank in values: + perf_diff = (best_occ_split_k_tflops / fixed_split_k_tflops) * 100.0 if fixed_split_k_tflops > 0 else 0.0 + perf_difference.append(min(150.0, perf_diff)) + ranking.append(rank) + + plot_perf(perf_difference, output_dir, suffix=suffix, op_name=op_name, label="-all_instances") + + # Create a bar chart for the ranking distribution + title = op_name if op_name else "Ranking Distribution of All Instances" + title_size = 14 if op_name else 16 + plt.figure(figsize=(10, 6)) + + # Define the bins edges + bin_edges = range(1, max(ranking) + 2) + + # Create histogram + counts, bins, patches = plt.hist(ranking, bins=bin_edges, + color='skyblue', edgecolor='black', alpha=0.7) + + # Calculate the center of each bin for x-ticks + bin_centers = [bins[i] + (bins[i+1] - bins[i])/2 for i in range(len(bins)-1)] + + plt.title(title, fontsize=title_size, fontweight='bold') + plt.xlabel('Rank', fontsize=12) + plt.ylabel('Count', fontsize=12) + + # Add explanation text middle top + y_loc = 0.9*max(counts) + explanation = "Candidate split-K values ['best occupancy', 1, 2, 4, 8, 16, 32, 64, 128].\n" \ + "Ranking of 'best occupancy' value for each solver instance\n" \ + "Rank 1 is the best, rank 2 is second best, etc." + plt.text(2.5, y_loc, explanation) + + # Set x-ticks at the center of each bar + plt.xticks(bin_centers, range(1, max(ranking) + 1)) + + plt.grid(True, linestyle='--', alpha=0.7) + plt.tight_layout() + rank_distribution_path = os.path.join(output_dir, f'ranking_distribution{suffix}.png') + plt.savefig(rank_distribution_path, dpi=150) + print(f"Saved ranking distribution chart to: {rank_distribution_path}") + def main(): args = parse_cli_args() @@ -824,9 +874,72 @@ def main(): # 17 - strategy # 18 - total number of candidate ops. + # Columns 19-30 are + # 19: op_name + # 20: fixed_split_k_time + # 21: fixed_split_k_tflops + # 22: fixed_split_k_value + # 23: rank_fixed_split_k + # 24: strategy (FixedSplitK) + # 25: best_occupancy_split_k_time + # 26: best_occupancy_split_k_tflops + # 27: best_occupancy_split_k_value + # 28: rank_best_occupancy_split_k + # 29: strategy (BestOccupancy) + # 30: total number of candidate values + # This repeats for size=12 blocks, i.e., the next 12 elemnts from 31-42 have the same structure if they are not null. + # Collect these elents into a dictionary + # where each key is the profiler_command and the value is a list of tuples containing the values for each block. + solvers_per_conv_shape = defaultdict(list) + offset = 18 + size = 12 + for i in range(len(profiler_commands)): + profiler_command = profiler_commands.iloc[i] + #print(f"Processing profiler command: {profiler_command}, row: {i}") + if pd.isna(profiler_command): + continue + if profiler_command not in solvers_per_conv_shape: + solvers_per_conv_shape[profiler_command] = [] + for j in range(0, len(df.columns) - size - offset, size): + op_name = df.iloc[i, offset + j + 1] + if pd.isna(op_name): + continue + + try: + loc_fixed_split_k_time = float(df.iloc[i, offset + j + 2]) + loc_fixed_split_k_tflops = float(df.iloc[i, offset + j + 3]) + loc_fixed_split_k_value = int(df.iloc[i, offset + j + 4]) + loc_rank_fixed_split_k = int(df.iloc[i, offset + j + 5]) + loc_strategy_fixed_split_k = df.iloc[i, offset + j + 6] + loc_best_occupancy_split_k_time = float(df.iloc[i, offset + j + 7]) + loc_best_occupancy_split_k_tflops = float(df.iloc[i, offset + j + 8]) + loc_best_occupancy_split_k_value = int(df.iloc[i, offset + j + 9]) + loc_rank_best_occupancy_split_k = int(df.iloc[i, offset + j + 10]) + loc_strategy_best_occupancy_split_k = df.iloc[i, offset + j + 11] + loc_num_candidates = int(df.iloc[i, offset + j + 12]) + + assert loc_strategy_fixed_split_k == "SplitKStrategy::FixedSplitK", \ + f"Expected strategy_fixed_split_k to be 'SplitKStrategy::FixedSplitK', got {loc_strategy_fixed_split_k}." + assert loc_strategy_best_occupancy_split_k == "SplitKStrategy::BestOccupancy", \ + f"Expected strategy_best_occupancy_split_k to be 'SplitKStrategy::BestOccupancy', got {loc_strategy_best_occupancy_split_k}." + # Candidates: {-1, 1, 2, 4, 8, 16, 32, 64, 128} + # Sometime the split-K value can be incompatible with the V3 pipeline and we have may less than 9 candidates. + assert loc_num_candidates <= 9 and loc_num_candidates > 1, \ + f"Expected num_candidates to be 9, got {loc_num_candidates}." + assert loc_rank_best_occupancy_split_k >= 1 and loc_rank_best_occupancy_split_k <= 9, \ + f"Expected rank_best_occupancy_split_k to be between 1 and 9, got {loc_rank_best_occupancy_split_k}." + + solvers_per_conv_shape[profiler_command].append( + (loc_fixed_split_k_value, loc_fixed_split_k_tflops, loc_best_occupancy_split_k_value, loc_best_occupancy_split_k_tflops, loc_rank_best_occupancy_split_k)) + except (ValueError, TypeError) as e: + print(f"Warning: Could not process row {i}, block {j}: {e}. Skipping this block.") + continue + op_name = fixed_split_k_ops.iloc[0].split("<")[0] suffix = f"_{args.label}" if args.label else "" + plot_perf_for_all_solvers(solvers_per_conv_shape, args.output_dir, suffix, op_name) + G, N, K, C, Y, X, Ho, Wo = get_convolution_shapes(profiler_commands) plot_tSNE_performance(G,N,K,C,Y,X,Ho,Wo, fixed_split_k_tflops.astype(float).values, best_occupancy_split_k_tflops.astype(float).values, args.output_dir, suffix, op_name) diff --git a/script/run_conv_profiler.py b/script/run_conv_profiler.py index ac0d70e6a0..e1678f786a 100644 --- a/script/run_conv_profiler.py +++ b/script/run_conv_profiler.py @@ -45,7 +45,11 @@ def run_ck_profiler_cmd(cmd, disabled_ops, run_id, log_to_stdout=False): subprocess.run(cmd, env=env_vars) else: with open(os.devnull, 'w') as devnull: - subprocess.run(cmd, env=env_vars, stdout=devnull) + timeoutInSec = 300 + try: + subprocess.run(cmd, env=env_vars, stdout=devnull, timeout=timeoutInSec) + except subprocess.TimeoutExpired: + print(f"Command '{cmd_str}' timed out after {timeoutInSec} seconds.", file=sys.stderr) def get_profiler_commands(csv_file): profiler_commands = []