diff --git a/script/profile_splitK_gemm.sh b/script/profile_splitK_gemm.sh new file mode 100755 index 0000000000..d62f0e4753 --- /dev/null +++ b/script/profile_splitK_gemm.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +## GPU visibility +export HIP_VISIBLE_DEVICES=0 +DRIVER="../build/bin/ckProfiler" +echo $DRIVER +OP=$1 +DATATYPE=$2 +LAYOUT=$3 +VERIFY=$4 +INIT=$5 +LOG=$6 +TIME=$7 +KBatch=$8 + + +# 120 CU +######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC KBatch_ + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1 $KBatch + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 2048 2048 -1 -1 -1 $KBatch + +# 104 CU +######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC KBatch_ + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 832 1024 1024 -1 -1 -1 $KBatch + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 832 2048 2048 -1 -1 -1 $KBatch + +# 110 CU +######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC KBatch_ + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 1408 1024 -1 -1 -1 $KBatch + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 2816 2048 -1 -1 -1 $KBatch + +# testing different strides +######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC KBatch_ + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1024 1024 1024 $KBatch + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2048 2048 2048 $KBatch + + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1056 1056 1056 $KBatch + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2080 2080 2080 $KBatch + + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1088 1088 1088 $KBatch + $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2112 2112 2112 $KBatch diff --git a/script/run_full_performance_tests.sh b/script/run_full_performance_tests.sh index f0eeb31f88..bd2d48b668 100755 --- a/script/run_full_performance_tests.sh +++ b/script/run_full_performance_tests.sh @@ -122,3 +122,17 @@ export reduction_log="perf_reduction_${gpu_arch}.log" print_log_header $reduction_log $env_type $branch $host_name ./profile_reduce_with_index.sh $verify 2 10 --half | tee -a $reduction_log ./profile_reduce_no_index.sh $verify 2 10 --half | tee -a $reduction_log + +#run splitK_gemm tests +export splitK_gemm_log="perf_splitK_gemm_${gpu_arch}.log" +print_log_header $splitK_gemm_log $env_type $branch $host_name + +#../script/profile_splitK_gemm.sh gemm_splitk 0 0 $verify 1 0 1 4 | tee -a $splitK_gemm_log +#../script/profile_splitK_gemm.sh gemm_splitk 0 1 $verify 1 0 1 4 | tee -a $splitK_gemm_log +#../script/profile_splitK_gemm.sh gemm_splitk 0 2 $verify 1 0 1 4 | tee -a $splitK_gemm_log +#../script/profile_splitK_gemm.sh gemm_splitk 0 3 $verify 1 0 1 4 | tee -a $splitK_gemm_log + +../script/profile_splitK_gemm.sh gemm_splitk 1 0 $verify 1 0 1 4 | tee -a $splitK_gemm_log +../script/profile_splitK_gemm.sh gemm_splitk 1 1 $verify 1 0 1 4 | tee -a $splitK_gemm_log +../script/profile_splitK_gemm.sh gemm_splitk 1 2 $verify 1 0 1 4 | tee -a $splitK_gemm_log +../script/profile_splitK_gemm.sh gemm_splitk 1 3 $verify 1 0 1 4 | tee -a $splitK_gemm_log