Perf analysis scripts.

2026-06-30 19:57:40 +00:00 · 2025-12-12 10:33:32 -05:00
parent fa19112a68
commit 3361cfd1bf
3 changed files with 116 additions and 13 deletions
--- a/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp
+++ b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp
@@ -273,11 +273,21 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
            std::stringstream out_ss;
            out_ss << "CK best configuration:" << std::endl
                << "name: " << best_op_name << std::endl
-                << "avg_time: " << best_avg_time << std::endl
-                << "SplitK: " << 1 << std::endl;
+                << "avg_time: " << best_avg_time << std::endl;
            out << out_ss.str();
            out.close();
+            std::cout << "Saved the best configuration to log file: " << log_file << std::endl;
        }
+        else 
+        {
+            std::cerr << "Warning: cannot open log file: " << log_file << std::endl;
+            exit(1);
+        }
+    }
+    else {
+        std::cout << "Note: set environment variable CK_PROFILER_LOG_FILE to save the best "
+                     "configuration to a log file"
+                  << std::endl;
    }
              
    if(instance_index != -1)
--- a/script/benchmark_ck_vs_ck_tile.py
+++ b/script/benchmark_ck_vs_ck_tile.py
@@ -51,11 +51,11 @@ def run_ck_profiler_cmd(cmd_args, profiler_type, bin_path, results_file, log_to_
    env["CK_PROFILER_LOG_FILE"] = results_file
    env["CK_TILE_PROFILER_LOG_FILE"] = results_file

+    timeoutInSec = 300 * 60 # 300 minutes timeout
    if log_to_stdout:
-      subprocess.run(cmd) 
+      subprocess.run(cmd, timeout=timeoutInSec, env=env) 
    else:
      with open(os.devnull, 'w') as devnull:
-        timeoutInSec = 300 * 60 # 300 minutes timeout
        try:
          subprocess.run(cmd, stdout=devnull, stderr=devnull, timeout=timeoutInSec, env=env)
        except subprocess.TimeoutExpired:
@@ -416,19 +416,14 @@ def main():
    if not os.path.exists(args.results_path):
        os.makedirs(args.results_path)

-    results_file = os.path.join(args.results_path, f"ck_results_{args.data_type}_{os.getpid()}.txt")
+    # Get computer host name
+    hostname = os.uname().nodename.split('.')[0]
+
+    results_file = os.path.join(args.results_path, f"ck_results_{hostname}_{args.data_type}_{os.getpid()}.txt")

    data_type_arg = data_type_str_to_profiler_arg(args.data_type)

    for i, cmd in enumerate(profiler_commands):
-        cmd_concatenated_str = ' '.join(cmd)
-        print(f"\n####################################################################################################################")
-        print(f"Running command {i + 1}/{len(profiler_commands)}: {cmd_concatenated_str}")
-        print(f"######################################################################################################################")
-        # with open(results_file, 'a') as f:
-        #   f.write(cmd_concatenated_str + "\n")
-        # run_ck_profiler_cmd(cmd, ProfilerType.CK_TILE, args.bin_path, results_file, args.log_to_stdout)
-
        # Set the correct data type based on user input
        cmd[1] = data_type_arg

@@ -438,6 +433,15 @@ def main():
        # We don't want to run verification. We assume CK already works correctly.
        cmd[3] = '0'  # Set verification flag to 0 (no verification)

+        cmd_concatenated_str = ' '.join(cmd)
+        print(f"\n####################################################################################################################")
+        print(f"Running command {i + 1}/{len(profiler_commands)}: {cmd_concatenated_str}")
+        print(f"######################################################################################################################")
+  
+        # Print the command to the output file
+        with open(results_file, 'a') as f:
+          f.write(cmd_concatenated_str + "\n")
+
        run_ck_profiler_cmd(cmd, ProfilerType.CK, args.bin_path, results_file, args.log_to_stdout)
  
 if __name__ == "__main__":
--- a/script/plot_navi_perf.py
+++ b/script/plot_navi_perf.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+
+import os
+import argparse
+import subprocess
+import sys
+import matplotlib.pyplot as plt
+# Non-interactive backend for matplotlib
+plt.switch_backend('Agg')
+import numpy as np
+
+def parse_cli_args():
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser(description="Run CK and CK Tile convolution profilers.")
+    parser.add_argument("--input-file-int8", type=str, dest="input_file_int8", required=False, help="Path to the file containing test results for int8.")
+    parser.add_argument("--input-file-fp16", type=str, dest="input_file_fp16", required=False, help="Path to the file containing test results for fp16.")
+    
+    args, unknown_args = parser.parse_known_args()
+    
+    if unknown_args:
+        print(f"Unknown arguments: {unknown_args}", file=sys.stderr)
+        sys.exit(1)
+    
+    return args
+
+def parse_times(input_file):
+    with open(input_file, 'r') as f:
+        lines = f.readlines()
+        ang_time_lines = lines[3::4]  # Every 4th line starting from line 3
+        avg_times = [float(line.strip().split("avg_time: ")[-1]) for line in ang_time_lines]
+        commnds = lines[0::4]  # Every 4th line starting from line 0
+
+        # Create a dictionary of commands to their average times
+        cmd_time_dict = {}
+        for cmd, time in zip(commnds, avg_times):
+            cmd_time_dict[cmd.strip()] = time
+    
+    return cmd_time_dict
+
+def plot_perf(times_int8, times_fp16, output_file):
+    #n_samples = min(len(times_int8), len(times_fp16))
+
+    # From two dictionaries, extract the values where the key is present in both dictionaries
+    speedup_percentage = []
+    for cmd in times_int8:
+        print(cmd)
+        print(f"Times int8: {times_int8[cmd]}")
+        # TODO: WE need account for the different data types in the commands
+        if cmd in times_fp16:
+            time_int8 = times_int8[cmd]
+            time_fp16 = times_fp16[cmd]
+            print(f"int8 time: {time_int8}, fp16 time: {time_fp16}")
+            if time_fp16 > 0:
+                speedup = (time_fp16 - time_int8) / time_fp16 * 100
+                speedup_percentage.append(speedup)
+
+    n_samples = len(speedup_percentage)
+    x = np.arange(n_samples)
+    plt.figure(figsize=(10, 6))
+    plt.plot(x, speedup_percentage, marker='o')
+    plt.title('Speedup of int8 over fp16')
+    plt.xlabel('Sample Index')
+    plt.ylabel('Speedup (%)')
+    plt.grid(True)
+    plt.savefig(output_file)
+    plt.close() 
+
+def main():
+  args = parse_cli_args()
+
+  times_int8 = parse_times(args.input_file_int8)
+  times_fp16 = parse_times(args.input_file_fp16)
+
+  #avg_times_int8 = np.mean(np.array(times_int8.items()))
+  #avg_times_fp16 = np.mean(np.array(times_fp16.items()))
+
+  print(f"Got {len(times_int8)} int8 samples and {len(times_fp16)} fp16 samples.")
+
+#   print(f"Average time for int8: {avg_times_int8} ms")
+#   print(f"Average time for fp16: {avg_times_fp16} ms")
+#   print(f"Speedup (int8 over fp16): {avg_times_fp16 / avg_times_int8:.2f}x")
+
+  output_plot_file = "navi_perf_int8_vs_fp16.png"
+  output_path = os.path.join(os.getcwd(), output_plot_file)
+  plot_perf(times_int8, times_fp16, output_path)
+  print(f"Performance plot saved to {output_path}")
+  
+if __name__ == "__main__":
+    main()