Analysis script improvements.

2026-07-01 04:07:56 +00:00 · 2025-07-08 13:06:30 +00:00
parent 1b8a5e815f
commit bac43239ff
2 changed files with 208 additions and 59 deletions
--- a/script/analyze_conv_tests.py
+++ b/script/analyze_conv_tests.py
@@ -535,17 +535,17 @@ def plot_performance(fixed_split_k_tflops, best_occupancy_split_k_tflops, gemm_m
    
    perf = (best_occupancy_split_k_tflops / fixed_split_k_tflops) * 100.0

-    x_values = np.log(gemm_k_arr) 
-    y_values = np.log(gemm_m_arr * gemm_n_arr)
+    x_values = np.log10(gemm_k_arr) 
+    y_values = np.log10(gemm_m_arr * gemm_n_arr)

    # Heat map with axis gemm_m * gemm_n and gemm_k
    scatter = plt.scatter(x_values, y_values, 
                c=perf,
-                cmap='coolwarm',
+                cmap='bwr',
                edgecolor='black',
                alpha=0.7,
                s=40,  # Size of the points
-                norm=plt.Normalize(vmin=50, vmax=150))  # Normalize colors: blue (<100%), red (>100%)
+                norm=plt.Normalize(vmin=0, vmax=200))  # Normalize colors: blue (<100%), red (>100%)
    
    title = op_name if op_name else 'Performance of Best Occupancy Split-K vs Fixed Split-K'
    title_size = 14 if op_name else 16
@@ -566,11 +566,11 @@ def plot_performance(fixed_split_k_tflops, best_occupancy_split_k_tflops, gemm_m
    plt.figure(figsize=(12, 8))
    scatter = plt.scatter(x_values, y_values,
                c=perf,
-                cmap='coolwarm',
+                cmap='bwr',
                edgecolor='black',
                alpha=0.7,
                s=40,  # Size of the points
-                norm=plt.Normalize(vmin=50, vmax=150))  # Normalize colors: blue (<100%), red (>100%)
+                norm=plt.Normalize(vmin=0, vmax=200))  # Normalize colors: blue (<100%), red (>100%)
    plt.colorbar(label='Performance (%)')
    plt.title(title, fontsize=title_size)
    plt.xlabel('log(K)', fontsize=14)
@@ -578,58 +578,142 @@ def plot_performance(fixed_split_k_tflops, best_occupancy_split_k_tflops, gemm_m
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

+    fp16_ridge_point = np.log10(1307.4 / 5.3)
+    fp32_ridge_point = np.log10(653.7 / 5.3) 
+    plt.axhline(y=fp16_ridge_point, color='green', linestyle='--', label='FP16/BF16 Ridge Point')
+    plt.axhline(y=fp32_ridge_point, color='black', linestyle='--', label='FP32 Ridge Point')
+
    file_name = os.path.join(output_dir, f'performance_heatmap_k_ai{suffix}.png')
    plt.savefig(file_name, dpi=150)
    print(f"Saved performance heatmap to: {file_name}")

-
-def main():
-    args = parse_cli_args()
-
-    csv.register_dialect('PipeDialect', delimiter=';')
-    with open(args.csv_file) as csvfile:
-        data = [row for row in csv.reader(csvfile, 'PipeDialect')]
-
-    df = pd.DataFrame(data = data)
-
-    print(f"Loaded {len(df)} rows.")
-    print(df.head())
-
-    if args.old_format:
-        fixed_split_k_ops = df[0]
-        fixed_split_k_times = df[1]
-        fixed_split_k_values = df[2]
-        best_occupancy_split_k_ops = df[3]
-        best_occupancy_split_k_times = df[4]
-        best_occupancy_split_k_values = df[5]
-    else:
-        valid_mask1 = df[10] == "SplitKStrategy::FixedSplitK"
-        valid_mask2 = df[16] == "SplitKStrategy::BestOccupancy"
-        valid_mask = valid_mask1 & valid_mask2
-
-        gemm_m = df[0][valid_mask]
-        gemm_n = df[1][valid_mask]
-        gemm_k = df[2][valid_mask]
-        arithmetic_intensity = df[3][valid_mask]
-        data_type = df[4][valid_mask]
-
-        fixed_split_k_ops = df[5][valid_mask]
-        fixed_split_k_times = df[6][valid_mask]
-        fixed_split_k_tflops = df[7][valid_mask]
-        fixed_split_k_values = df[8][valid_mask]
-        # 9 - rank
-        # 10 - strategy
+def plot_split_k_value_comparison(fixed_split_k_values, best_occupancy_split_k_values, gemm_k, arithmetic_intensity, output_dir, suffix, op_name):
+    """Plot the comparison of fixed split-k values vs best occupancy split-k values."""
+    plt.figure(figsize=(12, 8))
    
-        best_occupancy_split_k_ops = df[11][valid_mask]
-        best_occupancy_split_k_times = df[12][valid_mask]
-        best_occupancy_split_k_tflops = df[13][valid_mask]
-        best_occupancy_split_k_values = df[14][valid_mask]
-        # 15 - rank
-        # 16 - strategy
-        # 17 - total number of candidate ops.
+    # Convert to float for plotting
+    fixed_split_k_values = fixed_split_k_values.astype(float).values
+    best_occupancy_split_k_values = best_occupancy_split_k_values.astype(float).values
+    gemm_k_arr = gemm_k.astype(float).values
+    ai_arr = arithmetic_intensity.astype(float).values
+    
+    ratio = (fixed_split_k_values / best_occupancy_split_k_values)

-    suffix = f"_{args.label}" if args.label else ""
+    x_values = np.log(gemm_k_arr) 
+    y_values = np.log(ai_arr)

+    # Heat map with axis gemm_k and arithmetic intensity
+    scatter = plt.scatter(x_values, y_values, 
+                c=ratio,
+                cmap='viridis',
+                edgecolor='black',
+                alpha=0.7,
+                s=40,  # Size of the points
+                norm=plt.Normalize(vmin=0.0, vmax=2.0))
+
+    fp16_ridge_point = np.log10(1307.4 / 5.3)
+    fp32_ridge_point = np.log10(653.7 / 5.3) 
+    plt.axhline(y=fp16_ridge_point, color='green', linestyle='--', label='FP16/BF16 Ridge Point')
+    plt.axhline(y=fp32_ridge_point, color='black', linestyle='--', label='FP32 Ridge Point')
+    
+    title = op_name if op_name else 'Comparison of Fixed Split-K vs Best Occupancy Split-K'
+    title_size = 14 if op_name else 16
+
+    plt.colorbar(label='best fixed split-K / best occupancy split-K')
+    plt.title(title, fontsize=title_size)
+    plt.xlabel('log(K)', fontsize=14)
+    plt.ylabel('log(Arithmetic Intensity)', fontsize=14)
+    plt.grid(True, linestyle='--', alpha=0.7)
+    plt.tight_layout()
+
+    file_name = os.path.join(output_dir, f'split_k_value_comparison{suffix}.png')
+    plt.savefig(file_name, dpi=150)
+    print(f"Saved split-k value comparison heatmap to: {file_name}")
+
+def get_convolution_shapes(profiler_commands):
+    """Extract convolution shapes from profiler commands."""
+    G, N, K, C, Y, X, Ho, Wo = [], [], [], [], [], [], [], []
+    
+    for command in profiler_commands:
+        parts = command.split()
+        g = int(parts[9])
+        n = int(parts[10])
+        k = int(parts[11])
+        c = int(parts[12])
+        y = int(parts[13])
+        x = int(parts[13])
+        hi = int(parts[14])
+        wi = int(parts[15])
+        sy = int(parts[16])
+        sx = int(parts[17])
+        dy = int(parts[18])
+        dx = int(parts[19])
+        left_py = int(parts[20])
+        left_px = int(parts[21])
+        right_py = int(parts[22])
+        right_px = int(parts[23])
+
+        effective_y = dy * (y - 1) + 1
+        effective_x = dx * (x - 1) + 1
+        
+        total_pad_y = left_py + right_py
+        total_pad_x = left_px + right_px
+        
+        ho = (hi + total_pad_y - effective_y) // sy + 1
+        wo = (wi + total_pad_x - effective_x) // sx + 1
+     
+        G.append(g)
+        N.append(n)
+        K.append(k)
+        C.append(c)
+        Y.append(y)
+        X.append(x)
+        Ho.append(ho)
+        Wo.append(wo)
+    
+    return G, N, K, C, Y, X, Ho, Wo
+
+def plot_tSNE_performance(G, N, K, C, Y, X, Ho, Wo, fixed_split_k_tflops, best_occupancy_split_k_tflops, output_dir, suffix="", op_name=""):
+    """Plot t-SNE performance of fixed split-k vs best occupancy split-k."""
+    from sklearn.manifold import TSNE
+    
+    # Prepare data for t-SNE
+    data = np.array([G, N, K, C, Y, X, Ho, Wo]).T
+    tsne = TSNE(n_components=2, random_state=42)
+    tsne_results = tsne.fit_transform(data)
+
+    perf = (best_occupancy_split_k_tflops / fixed_split_k_tflops) * 100.0
+
+    plt.figure(figsize=(12, 8))
+    
+    # Scatter plot of t-SNE results
+    scatter = plt.scatter(
+        tsne_results[:, 0], 
+        tsne_results[:, 1], 
+        c=perf, 
+        cmap='bwr', 
+        edgecolor='black', 
+        alpha=0.7,
+        s=30,
+        norm=plt.Normalize(vmin=0, vmax=200))
+    
+    plt.colorbar(scatter, label='Performance (%)')
+    
+    title = op_name if op_name else 't-SNE Performance of Fixed Split-K vs Best Occupancy Split-K'
+    title_size = 14 if op_name else 16
+
+    plt.title(title, fontsize=title_size)
+    plt.xlabel('t-SNE Component 1', fontsize=14)
+    plt.ylabel('t-SNE Component 2', fontsize=14)
+    plt.grid(True, linestyle='--', alpha=0.7)
+    
+    file_name = os.path.join(output_dir, f'tSNE_performance{suffix}.png')
+    plt.savefig(file_name, dpi=150)
+    print(f"Saved t-SNE performance chart to: {file_name}")
+    
+    plt.close()
+
+def get_statistics(fixed_split_k_values, fixed_split_k_times, fixed_split_k_ops, best_occupancy_split_k_values, best_occupancy_split_k_times, best_occupancy_split_k_ops):
    # Find indices where split-k is not in the standard set
    standard_split_k = ['1', '2', '4', '8', '16', '32', '64', '128']
    non_standard_indices = [i for i in range(len(best_occupancy_split_k_values)) 
@@ -685,23 +769,87 @@ def main():
            elif best_occ_split_k_time < tol and fixed_split_k_time < tol:
                print(f"WARNING: Both optimized and non-optimized times are too small for row {i}, skipping this. Split-K (opt): {best_occ_split_k_value}, Split-K (stardard): {fixed_split_k_value}")

+    return perf_change, fixed_split_k_counts, fixed_equal_best_occupancy_counts, best_occupancy_split_k_count, non_standard_indices
+
+def main():
+    args = parse_cli_args()
+
+    csv.register_dialect('PipeDialect', delimiter=';')
+    with open(args.csv_file) as csvfile:
+        data = [row for row in csv.reader(csvfile, 'PipeDialect')]
+
+    df = pd.DataFrame(data = data)
+
+    print(f"Loaded {len(df)} rows.")
+    print(df.head())
+
+    if args.old_format:
+        fixed_split_k_ops = df[0]
+        fixed_split_k_times = df[1]
+        fixed_split_k_values = df[2]
+        best_occupancy_split_k_ops = df[3]
+        best_occupancy_split_k_times = df[4]
+        best_occupancy_split_k_values = df[5]
+    else:
+        # The dataframe may row that that contain only one column. 
+        # These are the shapes where no instance of the solver was applicable.
+        # Separate these into a separate dataframe.
+        non_null_counts = df.count(axis=1)
+        no_applicable_op_found = df[non_null_counts == 1].copy()
+        df = df[non_null_counts > 1].copy()
+
+        valid_mask1 = df[11] == "SplitKStrategy::FixedSplitK"
+        valid_mask2 = df[17] == "SplitKStrategy::BestOccupancy"
+        valid_mask = valid_mask1 & valid_mask2
+
+        profiler_commands = df[0][valid_mask]
+        gemm_m = df[1][valid_mask]
+        gemm_n = df[2][valid_mask]
+        gemm_k = df[3][valid_mask]
+        arithmetic_intensity = df[4][valid_mask]
+        data_type = df[5][valid_mask]
+
+        fixed_split_k_ops = df[6][valid_mask]
+        fixed_split_k_times = df[7][valid_mask]
+        fixed_split_k_tflops = df[8][valid_mask]
+        fixed_split_k_values = df[8][valid_mask]
+        # 10 - rank
+        # 11 - strategy
+    
+        best_occupancy_split_k_ops = df[12][valid_mask]
+        best_occupancy_split_k_times = df[13][valid_mask]
+        best_occupancy_split_k_tflops = df[14][valid_mask]
+        best_occupancy_split_k_values = df[15][valid_mask]
+        # 16 - rank
+        # 17 - strategy
+        # 18 - total number of candidate ops.
+
    op_name = fixed_split_k_ops.iloc[0].split("<")[0]
+    suffix = f"_{args.label}" if args.label else ""
+
+    G, N, K, C, Y, X, Ho, Wo = get_convolution_shapes(profiler_commands)
+    plot_tSNE_performance(G,N,K,C,Y,X,Ho,Wo, fixed_split_k_tflops.astype(float).values, best_occupancy_split_k_tflops.astype(float).values, args.output_dir, suffix, op_name)
+
+    perf_change, fixed_split_k_counts, fixed_equal_best_occupancy_counts, best_occupancy_split_k_count, non_standard_indices = get_statistics(
+        fixed_split_k_values, fixed_split_k_times, fixed_split_k_ops,
+        best_occupancy_split_k_values, best_occupancy_split_k_times, best_occupancy_split_k_ops)
+
    plot_perf(perf_change, args.output_dir, suffix, op_name)

    plot_best_split_k_values(
        fixed_split_k_counts, best_occupancy_split_k_count, 
        fixed_equal_best_occupancy_counts, suffix, args)

-    # If optimized count is non-zero, show the distribution of optimized values
-    if best_occupancy_split_k_count > 0:
-        non_standard_values = [best_occupancy_split_k_values.iloc[i] for i in non_standard_indices]
-        non_standard_counts = {}
-        for val in non_standard_values:
-            non_standard_counts[val] = non_standard_counts.get(val, 0) + 1
-        
-        plot_split_k_distribution(non_standard_counts, best_occupancy_split_k_count, args, suffix)
+    non_standard_values = [best_occupancy_split_k_values.iloc[i] for i in non_standard_indices]
+    non_standard_counts = {}
+    for val in non_standard_values:
+        non_standard_counts[val] = non_standard_counts.get(val, 0) + 1
+    
+    plot_split_k_distribution(non_standard_counts, best_occupancy_split_k_count, args, suffix)

    plot_performance(fixed_split_k_tflops, best_occupancy_split_k_tflops, gemm_m, gemm_n, gemm_k, arithmetic_intensity, args.output_dir, suffix, op_name)

+    plot_split_k_value_comparison(fixed_split_k_values, best_occupancy_split_k_values, gemm_k, arithmetic_intensity, args.output_dir, suffix, op_name)
+
 if __name__ == "__main__":
    main()
--- a/script/run_conv_profiler.py
+++ b/script/run_conv_profiler.py
@@ -51,6 +51,7 @@ def get_profiler_commands(csv_file):
  profiler_commands = []
  with open(csv_file, 'r') as f:
    lines = f.readlines()
+    lines = lines[1:]  # Skip the header line
    lines = list(dict.fromkeys(lines))
    for line in lines:
        line = line.strip()