Improve benchmarking scripts.

This commit is contained in:
Ville Pietilä
2026-02-09 04:34:27 -05:00
parent 53201d2081
commit 60d1ec34a9
2 changed files with 127 additions and 5 deletions

View File

@@ -0,0 +1,118 @@
#!/usr/bin/env python3
import subprocess
import sys
from bs4 import BeautifulSoup
import re
import json
def get_mangled_kernel_names_from_html(html_file):
with open(html_file, 'r') as f:
html_content = f.read()
# Parse with BeautifulSoup to find the right script tag
soup = BeautifulSoup(html_content, 'html.parser')
script_tags = soup.find_all('script')
for i, script in enumerate(script_tags):
if script.string:
# Look for scripts containing our kernel signature strings
if 'tensor_operation' in script.string and 'device' in script.string:
print(f"Found kernel data in script tag {i}")
# Now extract the data array from this specific script
match = re.search(r'var\s+data\s*=\s*(\[.*?\]);', script.string, re.DOTALL)
if match:
try:
data_str = match.group(1)
data = json.loads(data_str)
# Extract kernel names
kernel_names = [item['name'] for item in data]
print(f"Found {len(kernel_names)} kernel names:")
for name in kernel_names:
print(f" {name}")
return kernel_names
except json.JSONDecodeError as e:
print(f"Error parsing JSON: {e}")
# Continue to next script tag
print("Could not find kernel data in any script tag")
return []
def de_mangle_name(mangled):
demangler = '/opt/rocm/llvm/bin/llvm-cxxfilt'
try:
result = subprocess.run(
[demangler],
input=mangled,
text=True,
capture_output=True,
check=True
)
demangled = result.stdout.strip()
if demangled and demangled != mangled:
return demangled
except (FileNotFoundError, subprocess.CalledProcessError) as e:
print(f"Error using {demangler}: {e}")
return None
def extract_instance_name(demangled, prefix, prefix_instance):
if not demangled.startswith(prefix + prefix_instance):
return None
# Start after the prefix
start = len(prefix + prefix_instance)
# Track angle bracket depth to find matching closing bracket
depth = 1 # We already counted the opening '<' from prefix
i = start
while i < len(demangled) and depth > 0:
if demangled[i] == '<':
depth += 1
elif demangled[i] == '>':
depth -= 1
i += 1
if depth == 0:
# Extract from start to just before the matching '>'
return prefix_instance + demangled[start:i]
return None
def extract_GridwiseGemmMultiD_xdl_cshuffle_v3(demangled):
prefix = "void ck::tensor_operation::device::(anonymous namespace)::kernel_grouped_conv_fwd_xdl_cshuffle_v3<"
prefix_instance = "ck::GridwiseGemmMultiD_xdl_cshuffle_v3<"
return extract_instance_name(demangled, prefix, prefix_instance)
def extract_GridwiseGemmMultiD_xdl_cshuffle(demangled):
prefix = "void ck::tensor_operation::device::(anonymous namespace)::kernel_grouped_conv_fwd_xdl_cshuffle<"
prefix_instance = "ck::GridwiseGemmMultiD_xdl_cshuffle<"
return extract_instance_name(demangled, prefix, prefix_instance)
if __name__ == "__main__":
# get_mangled_kernel_names_from_html(sys.argv[1])
# Mangled name is the first argument
if len(sys.argv) > 1:
mangled = sys.argv[1]
demangled = de_mangle_name(mangled)
print()
print("Demangled name:")
print(demangled)
v3_instance_name = extract_GridwiseGemmMultiD_xdl_cshuffle_v3(demangled)
if v3_instance_name:
print()
print("Extracted GridwiseGemmMultiD_xdl_cshuffle_v3 instance name:")
print(v3_instance_name)
v1_instance_name = extract_GridwiseGemmMultiD_xdl_cshuffle(demangled)
if v1_instance_name:
print()
print("Extracted GridwiseGemmMultiD_xdl_cshuffle instance name:")
print(v1_instance_name)
else:
print("Please provide a mangled name as an argument.")

View File

@@ -5,12 +5,16 @@ import subprocess
import sys
import argparse
# Run under rocprof-compute
# HIP_VISIBLE_DEVICES=7 rockprof-compute profile -n grouped_conv_fwd --roofline-data-type FP16 -- ./run-best-instances.py --profiler-path ../build-improved-convs/bin/ckProfiler
# Set no verify, and timing of the kernel. Multiple calls of the same kernel inside the script will confuse the profiler.
profiler_commands = [
"1 1 1 0 1 0 1 2 32 32 4 4 3 3 200 200 1 1 1 1 1 1 1 1",
"1 1 1 0 1 0 1 2 32 32 8 8 3 3 200 200 2 2 1 1 1 1 1 1",
#"1 1 1 0 1 0 1 2 32 32 8 8 3 3 100 100 1 2 1 1 1 1 1 1",
"1 1 1 0 1 0 1 2 1 32 2376 256 3 3 100 100 1 1 1 1 1 1 1 1",
#"1 1 1 0 1 0 1 2 1 32 256 256 3 3 100 100 1 1 1 1 1 1 1 1"
"1 1 1 0 1 0 0 2 32 32 4 4 3 3 200 200 1 1 1 1 1 1 1 1",
"1 1 1 0 1 0 0 2 32 32 8 8 3 3 200 200 2 2 1 1 1 1 1 1",
#"1 1 1 0 1 0 0 2 32 32 8 8 3 3 100 100 1 2 1 1 1 1 1 1",
"1 1 1 0 1 0 0 2 1 32 2376 256 3 3 100 100 1 1 1 1 1 1 1 1",
#"1 1 1 0 1 0 0 2 1 32 256 256 3 3 100 100 1 1 1 1 1 1 1 1"
]
baseline_instances = [