diff --git a/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp index 61f2df2158..69ef4a9958 100644 --- a/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp +++ b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp @@ -273,11 +273,21 @@ bool profile_grouped_conv_fwd_impl(int do_verification, std::stringstream out_ss; out_ss << "CK best configuration:" << std::endl << "name: " << best_op_name << std::endl - << "avg_time: " << best_avg_time << std::endl - << "SplitK: " << 1 << std::endl; + << "avg_time: " << best_avg_time << std::endl; out << out_ss.str(); out.close(); + std::cout << "Saved the best configuration to log file: " << log_file << std::endl; } + else + { + std::cerr << "Warning: cannot open log file: " << log_file << std::endl; + exit(1); + } + } + else { + std::cout << "Note: set environment variable CK_PROFILER_LOG_FILE to save the best " + "configuration to a log file" + << std::endl; } if(instance_index != -1) diff --git a/script/benchmark_ck_vs_ck_tile.py b/script/benchmark_ck_vs_ck_tile.py index 8c8f20de22..c5f1b0746f 100755 --- a/script/benchmark_ck_vs_ck_tile.py +++ b/script/benchmark_ck_vs_ck_tile.py @@ -51,11 +51,11 @@ def run_ck_profiler_cmd(cmd_args, profiler_type, bin_path, results_file, log_to_ env["CK_PROFILER_LOG_FILE"] = results_file env["CK_TILE_PROFILER_LOG_FILE"] = results_file + timeoutInSec = 300 * 60 # 300 minutes timeout if log_to_stdout: - subprocess.run(cmd) + subprocess.run(cmd, timeout=timeoutInSec, env=env) else: with open(os.devnull, 'w') as devnull: - timeoutInSec = 300 * 60 # 300 minutes timeout try: subprocess.run(cmd, stdout=devnull, stderr=devnull, timeout=timeoutInSec, env=env) except subprocess.TimeoutExpired: @@ -416,19 +416,14 @@ def main(): if not os.path.exists(args.results_path): os.makedirs(args.results_path) - results_file = os.path.join(args.results_path, f"ck_results_{args.data_type}_{os.getpid()}.txt") + # Get computer host name + hostname = os.uname().nodename.split('.')[0] + + results_file = os.path.join(args.results_path, f"ck_results_{hostname}_{args.data_type}_{os.getpid()}.txt") data_type_arg = data_type_str_to_profiler_arg(args.data_type) for i, cmd in enumerate(profiler_commands): - cmd_concatenated_str = ' '.join(cmd) - print(f"\n####################################################################################################################") - print(f"Running command {i + 1}/{len(profiler_commands)}: {cmd_concatenated_str}") - print(f"######################################################################################################################") - # with open(results_file, 'a') as f: - # f.write(cmd_concatenated_str + "\n") - # run_ck_profiler_cmd(cmd, ProfilerType.CK_TILE, args.bin_path, results_file, args.log_to_stdout) - # Set the correct data type based on user input cmd[1] = data_type_arg @@ -438,6 +433,15 @@ def main(): # We don't want to run verification. We assume CK already works correctly. cmd[3] = '0' # Set verification flag to 0 (no verification) + cmd_concatenated_str = ' '.join(cmd) + print(f"\n####################################################################################################################") + print(f"Running command {i + 1}/{len(profiler_commands)}: {cmd_concatenated_str}") + print(f"######################################################################################################################") + + # Print the command to the output file + with open(results_file, 'a') as f: + f.write(cmd_concatenated_str + "\n") + run_ck_profiler_cmd(cmd, ProfilerType.CK, args.bin_path, results_file, args.log_to_stdout) if __name__ == "__main__": diff --git a/script/plot_navi_perf.py b/script/plot_navi_perf.py new file mode 100755 index 0000000000..7f8df75935 --- /dev/null +++ b/script/plot_navi_perf.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 + +import os +import argparse +import subprocess +import sys +import matplotlib.pyplot as plt +# Non-interactive backend for matplotlib +plt.switch_backend('Agg') +import numpy as np + +def parse_cli_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description="Run CK and CK Tile convolution profilers.") + parser.add_argument("--input-file-int8", type=str, dest="input_file_int8", required=False, help="Path to the file containing test results for int8.") + parser.add_argument("--input-file-fp16", type=str, dest="input_file_fp16", required=False, help="Path to the file containing test results for fp16.") + + args, unknown_args = parser.parse_known_args() + + if unknown_args: + print(f"Unknown arguments: {unknown_args}", file=sys.stderr) + sys.exit(1) + + return args + +def parse_times(input_file): + with open(input_file, 'r') as f: + lines = f.readlines() + ang_time_lines = lines[3::4] # Every 4th line starting from line 3 + avg_times = [float(line.strip().split("avg_time: ")[-1]) for line in ang_time_lines] + commnds = lines[0::4] # Every 4th line starting from line 0 + + # Create a dictionary of commands to their average times + cmd_time_dict = {} + for cmd, time in zip(commnds, avg_times): + cmd_time_dict[cmd.strip()] = time + + return cmd_time_dict + +def plot_perf(times_int8, times_fp16, output_file): + #n_samples = min(len(times_int8), len(times_fp16)) + + # From two dictionaries, extract the values where the key is present in both dictionaries + speedup_percentage = [] + for cmd in times_int8: + print(cmd) + print(f"Times int8: {times_int8[cmd]}") + # TODO: WE need account for the different data types in the commands + if cmd in times_fp16: + time_int8 = times_int8[cmd] + time_fp16 = times_fp16[cmd] + print(f"int8 time: {time_int8}, fp16 time: {time_fp16}") + if time_fp16 > 0: + speedup = (time_fp16 - time_int8) / time_fp16 * 100 + speedup_percentage.append(speedup) + + n_samples = len(speedup_percentage) + x = np.arange(n_samples) + plt.figure(figsize=(10, 6)) + plt.plot(x, speedup_percentage, marker='o') + plt.title('Speedup of int8 over fp16') + plt.xlabel('Sample Index') + plt.ylabel('Speedup (%)') + plt.grid(True) + plt.savefig(output_file) + plt.close() + +def main(): + args = parse_cli_args() + + times_int8 = parse_times(args.input_file_int8) + times_fp16 = parse_times(args.input_file_fp16) + + #avg_times_int8 = np.mean(np.array(times_int8.items())) + #avg_times_fp16 = np.mean(np.array(times_fp16.items())) + + print(f"Got {len(times_int8)} int8 samples and {len(times_fp16)} fp16 samples.") + +# print(f"Average time for int8: {avg_times_int8} ms") +# print(f"Average time for fp16: {avg_times_fp16} ms") +# print(f"Speedup (int8 over fp16): {avg_times_fp16 / avg_times_int8:.2f}x") + + output_plot_file = "navi_perf_int8_vs_fp16.png" + output_path = os.path.join(os.getcwd(), output_plot_file) + plot_perf(times_int8, times_fp16, output_path) + print(f"Performance plot saved to {output_path}") + +if __name__ == "__main__": + main() \ No newline at end of file