Ck tile/gemm perf measure (#1750)

* Finished adding the performance benchmark for ck tile gemm

* Fix the executable rename problem

* fix the executable name error

* delete the unsupported layout combinations

* Update run_full_test.sh

* Update benchmark_mem_pipeline.sh

* Update benchmark_basic.sh

* change the executable of gemm_universal

* change ck_tile_gemm script permissions

* Addressed the comment

* Addressed the comment

* Fixed the comments

* Fixed Comment

* roll back the malfunctioned change

* Fix the Typo

* finalize the tile_gemm_fp16 performance monitoring

* fix the stash names for ck_tile gemm logs

* change the stashing logic

* change stashing syntax

---------

Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
Co-authored-by: illsilin <Illia.Silin@amd.com>

[ROCm/composable_kernel commit: 73a076eee1]
This commit is contained in:
Thomas Ning
2025-01-09 17:41:49 -08:00
committed by GitHub
parent 6bc57cf274
commit dc1b18eebf
11 changed files with 178 additions and 13 deletions

View File

@@ -149,6 +149,12 @@ def parse_logfile(logfile):
lst=line.split()
line_dict=dict(zip(lst[1:],lst))
res.append(line_dict['TFlops,'])
elif 'perf_tile_gemm_basic' in logfile or 'perf_tile_gemm_mem_pipeline' in logfile:
for line in open(logfile):
if 'TFlops' in line:
lst=line.split()
line_dict=dict(zip(lst[1:],lst))
res.append(line_dict['TFlops,'])
return res
@@ -330,6 +336,14 @@ def main():
for i in range(1,len(results)+1):
testlist.append("Test%i"%i)
table_name="ck_fmha_bwd_tflops"
if 'gemm_basic_fp16' in filename:
for i in range(1, len(results)+1):
testlist.append("Test%i"%i)
table_name="ck_tile_gemm_basic_fp16_tflops"
if 'gemm_mem_pipeline_fp16' in filename:
for i in range(1, len(results)+1):
testlist.append("Test%i"%i)
table_name="ck_tile_gemm_mem_pipeline_fp16_tflops"
tflops_base = get_baseline(table_name,conn)
store_new_test_result(table_name, results, testlist, branch_name, node_id, gpu_arch, compute_units, rocm_vers, hip_vers, environment, sqlEngine)

View File

@@ -43,3 +43,19 @@ file=./perf_fmha_bwd_gfx90a.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_fmha_bwd_gfx90a.log
fi
file=./perf_tile_gemm_basic_fp16_gfx942.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_tile_gemm_basic_fp16_gfx942.log
fi
file=./perf_tile_gemm_basic_fp16_gfx90a.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_tile_gemm_basic_fp16_gfx90a.log
fi
file=./perf_tile_gemm_mem_pipeline_fp16_gfx942.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_tile_gemm_mem_pipeline_fp16_gfx942.log
fi
file=./perf_tile_gemm_mem_pipeline_fp16_gfx90a.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_tile_gemm_mem_pipeline_fp16_gfx90a.log
fi

View File

@@ -52,3 +52,19 @@ file=./perf_fmha_bwd_gfx90a.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_fmha_bwd_gfx90a.log
fi
file=./perf_gemm_basic_gfx942.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_gemm_basic_gfx942.log
fi
file=./perf_gemm_basic_gfx90a.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_gemm_basic_gfx90a.log
fi
file=./perf_gemm_mem_pipeline_gfx942.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_gemm_mem_pipeline_gfx942.log
fi
file=./perf_gemm_mem_pipeline_gfx90a.log
if [ -e "$file" ]; then
python3 process_perf_data.py perf_gemm_mem_pipeline_gfx90a.log
fi