mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 18:17:44 +00:00
Ck tile/gemm perf measure (#1750)
* Finished adding the performance benchmark for ck tile gemm
* Fix the executable rename problem
* fix the executable name error
* delete the unsupported layout combinations
* Update run_full_test.sh
* Update benchmark_mem_pipeline.sh
* Update benchmark_basic.sh
* change the executable of gemm_universal
* change ck_tile_gemm script permissions
* Addressed the comment
* Addressed the comment
* Fixed the comments
* Fixed Comment
* roll back the malfunctioned change
* Fix the Typo
* finalize the tile_gemm_fp16 performance monitoring
* fix the stash names for ck_tile gemm logs
* change the stashing logic
* change stashing syntax
---------
Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
Co-authored-by: illsilin <Illia.Silin@amd.com>
[ROCm/composable_kernel commit: 73a076eee1]
This commit is contained in:
@@ -1,2 +1,2 @@
|
||||
add_executable(tile_example_gemm_basic EXCLUDE_FROM_ALL gemm_basic.cpp)
|
||||
add_executable(tile_example_universal_gemm EXCLUDE_FROM_ALL universal_gemm.cpp)
|
||||
add_executable(tile_example_gemm_universal EXCLUDE_FROM_ALL universal_gemm.cpp)
|
||||
|
||||
@@ -11,9 +11,9 @@ sh ../script/cmake-ck-dev.sh ../ <arch>
|
||||
# The basic pipeline method on the gemm calculation
|
||||
make tile_example_gemm_basic -j
|
||||
# The memory bound pipeline on the gemm calculation
|
||||
make tile_example_gemm_mem_pipeline -j
|
||||
make tile_example_gemm_universal -j
|
||||
```
|
||||
This will result in an executable `build/bin/tile_example_gemm_basic`
|
||||
This will result in an executable `build/bin/tile_example_gemm_basic` & `build/bin/tile_example_gemm_universal`
|
||||
|
||||
## example
|
||||
```
|
||||
@@ -22,6 +22,9 @@ args:
|
||||
-m m dimension (default:1024)
|
||||
-n n dimension (default:2048)
|
||||
-k k dimension (default:64)
|
||||
-a_layout Tensor A data layout (default: R)
|
||||
-b_layout Tensor B data layout (default: R)
|
||||
-c_layout Tensor C data layout (default: R)
|
||||
-stride_a Tensor A stride (default:0)
|
||||
-stride_b Tensor B stride (default:0)
|
||||
-stride_c Tensor C stride (default:0)
|
||||
|
||||
13
example/ck_tile/03_gemm/script/benchmark_basic.sh
Executable file
13
example/ck_tile/03_gemm/script/benchmark_basic.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/sh
|
||||
EXE="$(find . -name tile_example_gemm_basic -type f | head -n 1)"
|
||||
VALID=0
|
||||
|
||||
for b_matrix_layout in "R" "C"; do
|
||||
for m in "64" "512" "1024" "2048"; do
|
||||
for n in "512" "1024" "2048"; do
|
||||
for k in "64" "512" "1024" "2048"; do
|
||||
$EXE -prec=fp16 -b=1 -m=$m -n=$n -k=$k -a_layout="R" -b_layout="$b_matrix_layout" -c_layout="R" -v=$VALID
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
13
example/ck_tile/03_gemm/script/benchmark_mem_pipeline.sh
Executable file
13
example/ck_tile/03_gemm/script/benchmark_mem_pipeline.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/sh
|
||||
EXE="$(find . -name tile_example_gemm_universal -type f | head -n 1)"
|
||||
VALID=0
|
||||
|
||||
for b_matrix_layout in "R" "C"; do
|
||||
for m in "64" "512" "1024" "2048"; do
|
||||
for n in "512" "1024" "2048"; do
|
||||
for k in "64" "512" "1024" "2048"; do
|
||||
$EXE -prec=fp16 -b=1 -m=$m -n=$n -k=$k -a_layout="R" -b_layout="$b_matrix_layout" -c_layout="R" -v=$VALID
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
@@ -19,7 +19,27 @@ echo 'Host name: ' $host_name
|
||||
export GPU_arch=$4
|
||||
echo 'GPU_arch: ' $GPU_arch
|
||||
|
||||
# run verification tests
|
||||
example/ck_tile/03_gemm/script/smoke_test.sh
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
echo 'On branch ' $3 &> $1;
|
||||
echo 'Node name: ' $4 >> $1;
|
||||
# get GPU architecture and compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1;
|
||||
rocminfo | grep "Compute Unit:" >> $1;
|
||||
hipcc --version | grep -e 'HIP version' >> $1;
|
||||
echo 'Environment type: ' $2 >> $1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
}
|
||||
|
||||
# We do not have a performance benchmark for gemm yet. Will add it in the future.
|
||||
# run verification tests
|
||||
example/ck_tile/03_gemm/script/smoke_test_basic.sh
|
||||
example/ck_tile/03_gemm/script/smoke_test_mem_pipeline.sh
|
||||
|
||||
# run performance benchmarks
|
||||
export gemm_basic_log="perf_tile_gemm_basic_fp16_$GPU_arch.log"
|
||||
print_log_header $gemm_basic_log $env_type $branch $host_name
|
||||
example/ck_tile/03_gemm/script/benchmark_basic.sh 2>&1 | tee -a $gemm_basic_log
|
||||
|
||||
export gemm_mem_pipeline_log="perf_tile_gemm_mem_pipeline_fp16_$GPU_arch.log"
|
||||
print_log_header $gemm_mem_pipeline_log $env_type $branch $host_name
|
||||
example/ck_tile/03_gemm/script/benchmark_mem_pipeline.sh 2>&1 | tee -a $gemm_mem_pipeline_log
|
||||
|
||||
@@ -32,4 +32,4 @@ set -x
|
||||
|
||||
run_fp16_tests
|
||||
|
||||
set +x
|
||||
set +x
|
||||
35
example/ck_tile/03_gemm/script/smoke_test_mem_pipeline.sh
Executable file
35
example/ck_tile/03_gemm/script/smoke_test_mem_pipeline.sh
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
EXE="$(find . -name tile_example_gemm_universal -type f | head -n 1)"
|
||||
KNAME=1
|
||||
|
||||
export CK_WARMUP=0
|
||||
export CK_REPEAT=1
|
||||
|
||||
COMMON_ARGS='-v=2 -warmup=0 -repeat=1'
|
||||
|
||||
run_fp16_tests() {
|
||||
for batch in 1 2; do
|
||||
for m in 128 1024; do
|
||||
for n in 128 2048; do
|
||||
for k in 32 64; do
|
||||
|
||||
$EXE -b=$batch -m=$m -n=$n -k=$k -stride_a=0 -stride_b=0 -stride_c=0 -e=1e-5 -prec=fp16 $COMMON_ARGS
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Success: Test with batch=$batch, m=$m, n=$n, k=$k executed successfully."
|
||||
else
|
||||
echo "Error: Test with batch=$batch, m=$m, n=$n, k=$k failed to execute properly."
|
||||
# Optionally, exit or break if you need to halt further execution
|
||||
# exit 1
|
||||
fi
|
||||
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
set -x
|
||||
|
||||
run_fp16_tests
|
||||
|
||||
set +x
|
||||
Reference in New Issue
Block a user