mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 11:16:59 +00:00
[CK] Add test script for all ckProfiler ops
1. Add test scritp for all ckProfiler ops 2. Extend script run_full_performance_tests.sh to inlcude all ops.
This commit is contained in:
15
script/profile_avg_pool2d_bwd.sh
Executable file
15
script/profile_avg_pool2d_bwd.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
|
||||
######## op datatype verify init log time length(NCHW) window size(YX) stride dilation left pad right pad
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 32 30 30 --wsize 2 2 --wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1 --dmmy 28 29 30 31 32
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 64 256 64 64 --wsize 2 2 --wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1 --dmmy 28 29 30 31 32
|
||||
|
||||
15
script/profile_avg_pool3d_bwd.sh
Executable file
15
script/profile_avg_pool3d_bwd.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
|
||||
######## op datatype verify init log time length(NCDHW) window size(YX) stride dilation left pad right pad
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 32 30 30 30 --wsize 2 2 2 --wstride 2 2 2 --wdilation 1 1 1 --pad1 1 1 1 --pad2 1 1 1
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 64 64 64 64 64 --wsize 2 2 2 --wstride 2 2 2 --wdilation 1 1 1 --pad1 1 1 1 --pad2 1 1 1
|
||||
|
||||
20
script/profile_batched_gemm_b_scale.sh
Executable file
20
script/profile_batched_gemm_b_scale.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout B_block_tile verify init log time M___ N___ K___ StrideA StrideB StrideC BatchCount KSplit
|
||||
$DRIVER $OP $DATATYPE $LAYOUT 1 $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1 8 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT 1 $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 -1 8 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT 1 $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 -1 4 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT 1 $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 -1 2 1
|
||||
|
||||
##todo: B_block_tile is incorect profile_batched_gemm_b_scale
|
||||
17
script/profile_batched_gemm_gemm.sh
Executable file
17
script/profile_batched_gemm_gemm.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ O__ BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 1024 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 2048 2048 2048 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840 4096 4096 4096 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680 8192 8192 8192 2
|
||||
37
script/profile_batched_gemm_reduce.sh
Executable file
37
script/profile_batched_gemm_reduce.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 -1 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 -1 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 -1 2
|
||||
|
||||
####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1024 1024 1024 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2048 2048 2048 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4096 4096 4096 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8192 8192 8192 2
|
||||
|
||||
####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1056 1056 1056 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2080 2080 2080 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4128 4128 4128 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8224 8224 8224 2
|
||||
|
||||
####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1088 1088 1088 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2112 2112 2112 8
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4160 4160 4160 4
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8256 8256 8256 2
|
||||
13
script/profile_bnorm.sh
Executable file
13
script/profile_bnorm.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY="-v $3"
|
||||
INIT=$4
|
||||
TIME=$5
|
||||
USE=$6
|
||||
|
||||
######## op datatype UseSavedMean init time inOutLengths(nhwc) reduceDims verify
|
||||
$DRIVER $OP $DATATYPE $USE $INIT $TIME -D 64,64,280,82 -R 1,2,3 $VERIFY
|
||||
14
script/profile_bnorm_fwd.sh
Executable file
14
script/profile_bnorm_fwd.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY="-v $3"
|
||||
INIT=$4
|
||||
TIME=$5
|
||||
USE=$6
|
||||
|
||||
######## op datatype updateMovingAverage saveMeanAndInvVariance init time inOutLengths(nhwc) reduceDims verify
|
||||
$DRIVER $OP $DATATYPE $USE 0 $INIT $TIME -D 64,64,280,82 -R 1,2,3 $VERIFY
|
||||
$DRIVER $OP $DATATYPE $USE 1 $INIT $TIME -D 64,64,280,82 -R 1,2,3 $VERIFY
|
||||
15
script/profile_contraction_bilinear.sh
Executable file
15
script/profile_contraction_bilinear.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
|
||||
######## op datatype compute_datatype num_dim layout verify init log time alpha beta M0 M1 N0 N1 K0 K1
|
||||
$DRIVER $OP $DATATYPE $DATATYPE 2 $LAYOUT $VERIFY $INIT $LOG $TIME 1.0 1.0 128 128 128 128 128 128
|
||||
15
script/profile_contraction_scale.sh
Executable file
15
script/profile_contraction_scale.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
######## op datatype compute_datatype num_dim layout verify init log time alpha M0 M1 N0 N1 K0 K1
|
||||
$DRIVER $OP $DATATYPE $DATATYPE 2 $LAYOUT $VERIFY $INIT $LOG $TIME 1.0 128 128 128 128 128 128
|
||||
|
||||
34
script/profile_conv.sh
Executable file
34
script/profile_conv.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
N=$8
|
||||
|
||||
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
19
script/profile_conv_fwd_bias_relu_add.sh
Executable file
19
script/profile_conv_fwd_bias_relu_add.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
N=$8
|
||||
######## op datatype layout layout layout verify init log time N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $LAYOUT $LAYOUT $VERIFY $INIT $LOG $TIME $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $LAYOUT $LAYOUT $VERIFY $INIT $LOG $TIME $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $LAYOUT $LAYOUT $VERIFY $INIT $LOG $TIME $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $LAYOUT $LAYOUT $VERIFY $INIT $LOG $TIME $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $LAYOUT $LAYOUT $VERIFY $INIT $LOG $TIME $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
21
script/profile_conv_tensor_rearrange.sh
Executable file
21
script/profile_conv_tensor_rearrange.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
OPTYPE=$8
|
||||
N=$9
|
||||
######## op datatype layout verify init log time op_type Dim G N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME $OPTYPE 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME $OPTYPE 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME $OPTYPE 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME $OPTYPE 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME $OPTYPE 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
|
||||
18
script/profile_gemm_b_scale.sh
Executable file
18
script/profile_gemm_b_scale.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
KBatch=$8
|
||||
|
||||
######## op datatype layout B_block_tile verify init log time M___ N___ K___ StrideA StrideB StrideC KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT 1 $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT 1 $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 -1 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT 1 $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 -1 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT 1 $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 -1 $KBatch
|
||||
19
script/profile_gemm_d0_d1_e.sh
Executable file
19
script/profile_gemm_d0_d1_e.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideD0 StrideD1 StrideE
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 1408 1024 -1 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 2816 2048 -1 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2560 1408 2048 -1 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2560 2816 2048 -1 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 5120 5632 4096 -1 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7040 8192 8192 -1 -1 -1 -1 -1
|
||||
19
script/profile_gemm_d0_e.sh
Executable file
19
script/profile_gemm_d0_e.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideD0 StrideE
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 1408 1024 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 2816 2048 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2560 1408 2048 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2560 2816 2048 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 5120 5632 4096 -1 -1 -1 -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7040 8192 8192 -1 -1 -1 -1
|
||||
36
script/profile_gemm_universal_batched.sh
Executable file
36
script/profile_gemm_universal_batched.sh
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
KBatch=$8
|
||||
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1 -1 -1 -1 8 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 -1 -1 -1 -1 8 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 -1 -1 -1 -1 4 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 -1 -1 -1 -1 2 $KBatch
|
||||
|
||||
####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1024 1024 1024 -1 -1 -1 8 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2048 2048 2048 -1 -1 -1 8 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4096 4096 4096 -1 -1 -1 4 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8192 8192 8192 -1 -1 -1 2 $KBatch
|
||||
|
||||
####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1056 1056 1056 -1 -1 -1 8 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2080 2080 2080 -1 -1 -1 8 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4128 4128 4128 -1 -1 -1 4 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8224 8224 8224 -1 -1 -1 2 $KBatch
|
||||
|
||||
####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1088 1088 1088 -1 -1 -1 8 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2112 2112 2112 -1 -1 -1 8 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4160 4160 4160 -1 -1 -1 4 $KBatch
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8256 8256 8256 -1 -1 -1 2 $KBatch
|
||||
40
script/profile_gemm_universal_streamk.sh
Executable file
40
script/profile_gemm_universal_streamk.sh
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
STRATEGY=$8
|
||||
|
||||
|
||||
# 120 CU
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC STRATEGY_ GridSize
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1 $STRATEGY -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 2048 2048 -1 -1 -1 $STRATEGY -1
|
||||
|
||||
# 104 CU
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC STRATEGY_
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 832 1024 1024 -1 -1 -1 $STRATEGY -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 832 2048 2048 -1 -1 -1 $STRATEGY -1
|
||||
|
||||
# 110 CU
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC STRATEGY_
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 1408 1024 -1 -1 -1 $STRATEGY -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 2816 2048 -1 -1 -1 $STRATEGY -1
|
||||
|
||||
# testing different strides
|
||||
######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC STRATEGY_
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1024 1024 1024 $STRATEGY -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2048 2048 2048 $STRATEGY -1
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1056 1056 1056 $STRATEGY -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2080 2080 2080 $STRATEGY -1
|
||||
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1088 1088 1088 $STRATEGY -1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2112 2112 2112 $STRATEGY -1
|
||||
|
||||
@@ -15,24 +15,24 @@ TIME=$7
|
||||
N=$8
|
||||
|
||||
# Resnet50
|
||||
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
|
||||
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads KSplit
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 1
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3 1
|
||||
|
||||
17
script/profile_grouped_gemm_fixed_nk.sh
Executable file
17
script/profile_grouped_gemm_fixed_nk.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
LAYOUT=$3
|
||||
VERIFY=$4
|
||||
INIT=$5
|
||||
LOG=$6
|
||||
TIME=$7
|
||||
|
||||
######## op datatype layout verify init log time Ms______________ Ns______________ Ks_____________ StrideAs___________ StrideBs__________ StrideCs___________
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960,960,960,960 1024,1024,1024,1024 1024,1024,1024,1024 960,960,960,960 1024,1024,1024,1024 1024,1024,1024,1024
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960,960,960,960 2048,2048,2048,2048 2048,2048,2048,2048 960,960,960,960 2048,2048,2048,2048 2048,2048,2048,2048
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840,3840,3840,3840 4096,4096,4096,4096 4096,4096,4096,4096 3840,3840,3840,3840 4096,4096,4096,4096 4096,4096,4096,4096
|
||||
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680,7680,7680,7680 8192,8192,8192,8192 8192,8192,8192,8192 7680,7680,7680,7680 8192,8192,8192,8192 8192,8192,8192,8192
|
||||
15
script/profile_groupnorm.sh
Executable file
15
script/profile_groupnorm.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
|
||||
######## op datatype verify init log time length
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 1 16 16 32 40
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 64 64 64 64
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 64 64 128 128
|
||||
15
script/profile_layernorm.sh
Executable file
15
script/profile_layernorm.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
|
||||
######## op datatype verify init log time length
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 256 256
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 1024 1024
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 4096 4096
|
||||
14
script/profile_max_pool2d_bwd.sh
Executable file
14
script/profile_max_pool2d_bwd.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
|
||||
######## op datatype verify init log time length(NCHW) window size(YX) stride dilation left pad right pad
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 32 30 30 --wsize 2 2 --wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1 --dmmy 28 29 30 31 32
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 64 32 256 256 --wsize 2 2 --wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1 --dmmy 28 29 30 31 32
|
||||
16
script/profile_max_pool2d_fwd.sh
Executable file
16
script/profile_max_pool2d_fwd.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
|
||||
######## op datatype verify init log time return_idx length(NCHW) window size(YX) stride dilation left pad right pad
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME 0 --length 2 32 30 30 --wsize 2 2 --wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME 0 --length 64 32 256 256 --wsize 2 2 --wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME 1 --length 2 32 30 30 --wsize 2 2 --wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME 1 --length 64 32 256 256 --wsize 2 2 --wstride 2 2 --wdilation 1 1 --pad1 1 1 --pad2 1 1
|
||||
14
script/profile_max_pool3d.sh
Executable file
14
script/profile_max_pool3d.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
|
||||
######## op datatype verify init log time length(NCDHW) window size(YX) stride dilation left pad right pad
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 32 30 30 30 --wsize 2 2 2 --wstride 2 2 2 --wdilation 1 1 1 --pad1 1 1 1 --pad2 1 1 1
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 8 16 32 256 256 --wsize 2 2 2 --wstride 2 2 2 --wdilation 1 1 1 --pad1 1 1 1 --pad2 1 1 1
|
||||
17
script/profile_pool3d_fwd.sh
Executable file
17
script/profile_pool3d_fwd.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
REDUCEOP=$7
|
||||
|
||||
######## op datatype verify init log time return_index reduce_op length(NCDHW) window size(YX) stride dilation left pad right pad
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME 0 $REDUCEOP --length 2 32 30 30 30 --wsize 2 2 2 --wstride 2 2 2 --wdilation 1 1 1 --pad1 1 1 1 --pad2 1 1 1
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME 0 $REDUCEOP --length 8 16 32 256 256 --wsize 2 2 2 --wstride 2 2 2 --wdilation 1 1 1 --pad1 1 1 1 --pad2 1 1 1
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME 1 $REDUCEOP --length 2 32 30 30 30 --wsize 2 2 2 --wstride 2 2 2 --wdilation 1 1 1 --pad1 1 1 1 --pad2 1 1 1
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME 1 $REDUCEOP --length 8 16 32 256 256 --wsize 2 2 2 --wstride 2 2 2 --wdilation 1 1 1 --pad1 1 1 1 --pad2 1 1 1
|
||||
18
script/profile_softmax.sh
Executable file
18
script/profile_softmax.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
|
||||
######## op datatype verify init log time length stride reduce
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 8 4 256 --stride 1024 256 1 --reduce 2
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 8 128 1024 --stride 2097152 1048576 131072 1 --reduce 2
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 8 128 1024 --stride 2097152 1048576 131072 1 --reduce 3
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 32 512 4096 --stride 134217728 67108864 2097152 1 --reduce 2
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME --length 2 32 512 4096 --stride 134217728 67108864 2097152 1 --reduce 3
|
||||
|
||||
13
script/profile_transpose.sh
Executable file
13
script/profile_transpose.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
## GPU visibility
|
||||
export HIP_VISIBLE_DEVICES=0
|
||||
DRIVER="../build/bin/ckProfiler"
|
||||
OP=$1
|
||||
DATATYPE=$2
|
||||
VERIFY=$3
|
||||
INIT=$4
|
||||
LOG=$5
|
||||
TIME=$6
|
||||
|
||||
######## op datatype verify init log time N C D H W
|
||||
$DRIVER $OP $DATATYPE $VERIFY $INIT $LOG $TIME 4 8 8 512 512
|
||||
@@ -12,6 +12,8 @@
|
||||
# environment tag : a string describing the specifics of your test environment
|
||||
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
|
||||
# node name : $hostname
|
||||
# extended op = 0 : do not include extended op (default)
|
||||
# = 1 : include extended op
|
||||
|
||||
#get the command line arguments:
|
||||
export verify=$1
|
||||
@@ -22,16 +24,19 @@ export branch=$3
|
||||
echo 'Branch name: ' $branch
|
||||
export host_name=$4
|
||||
echo 'Host name: ' $host_name
|
||||
export inlcude_extended_op=$5
|
||||
echo 'Inlcude extended op: ' $inlcude_extended_op
|
||||
|
||||
function print_log_header(){
|
||||
rm -f $1;
|
||||
echo 'On branch ' $3 &> $1;
|
||||
echo 'Node name: ' $4 >> $1;
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1;
|
||||
rocminfo | grep "Compute Unit:" >> $1;
|
||||
hipcc --version | grep -e 'HIP version' >> $1;
|
||||
echo 'Environment type: ' $2 >> $1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
rm -f $1;
|
||||
echo 'On branch ' $3 &> $1;
|
||||
echo 'Node name: ' $4 >> $1;
|
||||
#get GPU_arch and number of compute units from rocminfo
|
||||
echo -n "GPU_arch: " >> $1; rocminfo | grep "Name:" | grep "gfx" >> $1;
|
||||
rocminfo | grep "Compute Unit:" >> $1;
|
||||
hipcc --version | grep -e 'HIP version' >> $1;
|
||||
echo 'Environment type: ' $2 >> $1;
|
||||
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> $1;
|
||||
}
|
||||
|
||||
#run gemm tests
|
||||
@@ -103,6 +108,9 @@ print_log_header $grouped_conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 2 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 0 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 2 0 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_bwd_data_log
|
||||
|
||||
#run grouped_bwd_weight tests
|
||||
export grouped_conv_bwd_weight_log="perf_grouped_conv_bwd_weight.log"
|
||||
@@ -148,4 +156,472 @@ print_log_header $onnx_log $env_type $branch $host_name
|
||||
export mixed_gemm_log="perf_mixed_gemm.log"
|
||||
print_log_header $mixed_gemm_log $env_type $branch $host_name
|
||||
./profile_mixed_gemm.sh gemm_splitk 4 0 $verify 2 0 1 16 2>&1 | tee -a $mixed_gemm_log
|
||||
./profile_mixed_gemm.sh gemm_splitk 5 0 $verify 2 0 1 16 2>&1 | tee -a $mixed_gemm_log
|
||||
./profile_mixed_gemm.sh gemm_splitk 5 0 $verify 2 0 1 16 2>&1 | tee -a $mixed_gemm_log
|
||||
|
||||
if [ "$inlcude_extended_op" = "1" ]; then
|
||||
#run batched_gemm_add_relu_gemm_add tests
|
||||
export batched_gemm_add_relu_gemm_add_log="perf_batched_gemm_add_relu_gemm_add.log"
|
||||
print_log_header $batched_gemm_add_relu_gemm_add_log $env_type $branch $host_name
|
||||
./profile_batched_gemm_gemm.sh batched_gemm_add_relu_gemm_add 1 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_add_relu_gemm_add_log
|
||||
./profile_batched_gemm_gemm.sh batched_gemm_add_relu_gemm_add 1 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_add_relu_gemm_add_log
|
||||
|
||||
#run batched_gemm_b_scale tests
|
||||
export batched_gemm_b_scale_log="perf_batched_gemm_b_scale.log"
|
||||
print_log_header $batched_gemm_b_scale_log $env_type $branch $host_name
|
||||
./profile_batched_gemm_b_scale.sh batched_gemm_b_scale 8 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_b_scale_log
|
||||
|
||||
#run batched_gemm_gemm tests
|
||||
export batched_gemm_gemm_log="perf_batched_gemm_gemm.log"
|
||||
print_log_header $batched_gemm_gemm_log $env_type $branch $host_name
|
||||
./profile_batched_gemm_gemm.sh batched_gemm_gemm 1 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_gemm_log
|
||||
./profile_batched_gemm_gemm.sh batched_gemm_gemm 1 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_gemm_log
|
||||
|
||||
#run batched_gemm_multi_d tests
|
||||
export batched_gemm_multi_d_log="perf_batched_gemm_multi_d.log"
|
||||
print_log_header $batched_gemm_multi_d_log $env_type $branch $host_name
|
||||
./profile_batched_gemm.sh batched_gemm_multi_d 0 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_multi_d_log
|
||||
./profile_batched_gemm.sh batched_gemm_multi_d 0 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_multi_d_log
|
||||
./profile_batched_gemm.sh batched_gemm_multi_d 0 2 $verify 1 0 1 2>&1 | tee -a $batched_gemm_multi_d_log
|
||||
./profile_batched_gemm.sh batched_gemm_multi_d 0 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_multi_d_log
|
||||
./profile_batched_gemm.sh batched_gemm_multi_d 1 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_multi_d_log
|
||||
./profile_batched_gemm.sh batched_gemm_multi_d 1 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_multi_d_log
|
||||
./profile_batched_gemm.sh batched_gemm_multi_d 1 2 $verify 1 0 1 2>&1 | tee -a $batched_gemm_multi_d_log
|
||||
./profile_batched_gemm.sh batched_gemm_multi_d 1 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_multi_d_log
|
||||
|
||||
#run batched_gemm_reduce tests
|
||||
export batched_gemm_reduce_log="perf_batched_gemm_reduce.log"
|
||||
print_log_header $batched_gemm_reduce_log $env_type $branch $host_name
|
||||
./profile_batched_gemm_reduce.sh batched_gemm_reduce 1 0 $verify 1 0 1 2>&1 | tee -a $batched_gemm_reduce_log
|
||||
./profile_batched_gemm_reduce.sh batched_gemm_reduce 1 1 $verify 1 0 1 2>&1 | tee -a $batched_gemm_reduce_log
|
||||
./profile_batched_gemm_reduce.sh batched_gemm_reduce 1 2 $verify 1 0 1 2>&1 | tee -a $batched_gemm_reduce_log
|
||||
./profile_batched_gemm_reduce.sh batched_gemm_reduce 1 3 $verify 1 0 1 2>&1 | tee -a $batched_gemm_reduce_log
|
||||
|
||||
#run contraction_bilinear tests
|
||||
export contraction_bilinear_log="perf_contraction_bilinear.log"
|
||||
print_log_header $contraction_bilinear_log $env_type $branch $host_name
|
||||
./profile_contraction_bilinear.sh contraction_bilinear 0 0 $verify 1 0 1 2>&1 | tee -a $contraction_bilinear_log
|
||||
./profile_contraction_bilinear.sh contraction_bilinear 1 0 $verify 1 0 1 2>&1 | tee -a $contraction_bilinear_log
|
||||
|
||||
#run contraction_scale tests
|
||||
export contraction_scale_log="perf_contraction_scale.log"
|
||||
print_log_header $contraction_scale_log $env_type $branch $host_name
|
||||
./profile_contraction_scale.sh contraction_scale 0 0 $verify 1 0 1 2>&1 | tee -a $contraction_scale_log
|
||||
./profile_contraction_scale.sh contraction_scale 1 0 $verify 1 0 1 2>&1 | tee -a $contraction_scale_log
|
||||
|
||||
#run conv_bwd_data tests
|
||||
export conv_bwd_data_log="perf_conv_bwd_data.log"
|
||||
print_log_header $conv_bwd_data_log $env_type $branch $host_name
|
||||
./profile_conv.sh conv_bwd_data 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
./profile_conv.sh conv_bwd_data 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
./profile_conv.sh conv_bwd_data 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
./profile_conv.sh conv_bwd_data 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_bwd_data_log
|
||||
|
||||
#run conv_fwd_bias_relu_add tests
|
||||
export conv_fwd_bias_relu_add_log="perf_conv_fwd_bias_relu_add.log"
|
||||
print_log_header $conv_fwd_bias_relu_add_log $env_type $branch $host_name
|
||||
./profile_conv_fwd_bias_relu_add.sh conv_fwd_bias_relu_add 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_bias_relu_add_log
|
||||
|
||||
#run conv_fwd tests
|
||||
export conv_fwd_log="perf_conv_fwd.log"
|
||||
print_log_header $conv_fwd_log $env_type $branch $host_name
|
||||
./profile_conv.sh conv_fwd 0 0 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv.sh conv_fwd 1 0 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv.sh conv_fwd 2 0 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv.sh conv_fwd 3 0 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv.sh conv_fwd 0 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv.sh conv_fwd 1 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv.sh conv_fwd 2 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
./profile_conv.sh conv_fwd 3 1 $verify 1 0 1 256 2>&1 | tee -a $conv_fwd_log
|
||||
|
||||
#run conv_tensor_rearrange tests
|
||||
export conv_tensor_rearrange_log="perf_conv_tensor_rearrange.log"
|
||||
print_log_header $conv_tensor_rearrange_log $env_type $branch $host_name
|
||||
./profile_conv_tensor_rearrange.sh conv_tensor_rearrange 0 0 $verify 1 0 1 0 256 2>&1 | tee -a $conv_tensor_rearrange_log
|
||||
./profile_conv_tensor_rearrange.sh conv_tensor_rearrange 1 0 $verify 1 0 1 0 256 2>&1 | tee -a $conv_tensor_rearrange_log
|
||||
./profile_conv_tensor_rearrange.sh conv_tensor_rearrange 2 0 $verify 1 0 1 0 256 2>&1 | tee -a $conv_tensor_rearrange_log
|
||||
./profile_conv_tensor_rearrange.sh conv_tensor_rearrange 3 0 $verify 1 0 1 0 256 2>&1 | tee -a $conv_tensor_rearrange_log
|
||||
./profile_conv_tensor_rearrange.sh conv_tensor_rearrange 1 1 $verify 1 0 1 0 256 2>&1 | tee -a $conv_tensor_rearrange_log
|
||||
./profile_conv_tensor_rearrange.sh conv_tensor_rearrange 3 1 $verify 1 0 1 0 256 2>&1 | tee -a $conv_tensor_rearrange_log
|
||||
./profile_conv_tensor_rearrange.sh conv_tensor_rearrange 1 0 $verify 1 0 1 1 256 2>&1 | tee -a $conv_tensor_rearrange_log
|
||||
./profile_conv_tensor_rearrange.sh conv_tensor_rearrange 1 1 $verify 1 0 1 1 256 2>&1 | tee -a $conv_tensor_rearrange_log
|
||||
|
||||
#run gemm_ab_scale tests
|
||||
export gemm_ab_scale_log="perf_gemm_ab_scale.log"
|
||||
print_log_header $gemm_ab_scale_log $env_type $branch $host_name
|
||||
./profile_gemm_b_scale.sh gemm_ab_scale 7 1 $verify 1 0 1 2>&1 | tee -a $gemm_ab_scale_log
|
||||
|
||||
#run gemm_add_add_fastgelu tests
|
||||
export gemm_add_add_fastgelu_log="perf_gemm_add_add_fastgelu.log"
|
||||
print_log_header $gemm_add_add_fastgelu_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_d1_e.sh gemm_add_add_fastgelu 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_add_fastgelu_log
|
||||
./profile_gemm_d0_d1_e.sh gemm_add_add_fastgelu 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_add_add_fastgelu_log
|
||||
|
||||
#run gemm_add_fastgelu tests
|
||||
export gemm_add_fastgelu_log="perf_gemm_add_fastgelu.log"
|
||||
print_log_header $gemm_add_fastgelu_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_e.sh gemm_add_fastgelu 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_fastgelu_log
|
||||
./profile_gemm_d0_e.sh gemm_add_fastgelu 2 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_fastgelu_log
|
||||
./profile_gemm_d0_e.sh gemm_add_fastgelu 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_add_fastgelu_log
|
||||
./profile_gemm_d0_e.sh gemm_add_fastgelu 2 1 $verify 1 0 1 2>&1 | tee -a $gemm_add_fastgelu_log
|
||||
|
||||
#run gemm_add_multiply tests
|
||||
export gemm_add_multiply_log="perf_gemm_add_multiply.log"
|
||||
print_log_header $gemm_add_multiply_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_d1_e.sh gemm_add_multiply 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_multiply_log
|
||||
./profile_gemm_d0_d1_e.sh gemm_add_multiply 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_add_multiply_log
|
||||
|
||||
#run gemm_add_relu_add_layernorm tests
|
||||
export gemm_add_relu_add_layernorm_log="perf_gemm_add_relu_add_layernorm.log"
|
||||
print_log_header $gemm_add_relu_add_layernorm_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_d1_e.sh gemm_add_relu_add_layernorm 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_relu_add_layernorm_log
|
||||
./profile_gemm_d0_d1_e.sh gemm_add_relu_add_layernorm 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_add_relu_add_layernorm_log
|
||||
|
||||
#run gemm_add_relu tests
|
||||
export gemm_add_relu_log="perf_gemm_add_relu.log"
|
||||
print_log_header $gemm_add_relu_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_e.sh gemm_add_relu 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_relu_log
|
||||
./profile_gemm_d0_e.sh gemm_add_relu 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_relu_log
|
||||
|
||||
#run gemm_add_silu tests
|
||||
export gemm_add_silu_log="perf_gemm_add_silu.log"
|
||||
print_log_header $gemm_add_silu_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_e.sh gemm_add_silu 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_silu_log
|
||||
./profile_gemm_d0_e.sh gemm_add_silu 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_silu_log
|
||||
./profile_gemm_d0_e.sh gemm_add_silu 0 1 $verify 1 0 1 2>&1 | tee -a $gemm_add_silu_log
|
||||
./profile_gemm_d0_e.sh gemm_add_silu 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_add_silu_log
|
||||
|
||||
#run gemm_add tests
|
||||
export gemm_add_log="perf_gemm_add.log"
|
||||
print_log_header $gemm_add_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_e.sh gemm_add 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_add_log
|
||||
./profile_gemm_d0_e.sh gemm_add 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_add_log
|
||||
|
||||
#run gemm_b_scale tests
|
||||
export gemm_b_scale_log="perf_gemm_b_scale.log"
|
||||
print_log_header $gemm_b_scale_log $env_type $branch $host_name
|
||||
./profile_gemm_b_scale.sh gemm_b_scale 8 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_b_scale_log
|
||||
|
||||
#run gemm_bias_add_reduce tests
|
||||
export gemm_bias_add_reduce_log="perf_gemm_bias_add_reduce.log"
|
||||
print_log_header $gemm_bias_add_reduce_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_e.sh gemm_bias_add_reduce 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_bias_add_reduce_log
|
||||
./profile_gemm_d0_e.sh gemm_bias_add_reduce 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_bias_add_reduce_log
|
||||
|
||||
#run gemm_fastgelu tests
|
||||
export gemm_fastgelu_log="perf_gemm_fastgelu.log"
|
||||
print_log_header $gemm_fastgelu_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm_fastgelu 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_fastgelu_log
|
||||
./profile_gemm.sh gemm_fastgelu 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_fastgelu_log
|
||||
|
||||
#run gemm_multiply_add tests
|
||||
export gemm_multiply_add_log="perf_gemm_multiply_add.log"
|
||||
print_log_header $gemm_multiply_add_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_d1_e.sh gemm_multiply_add 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_multiply_add_log
|
||||
./profile_gemm_d0_d1_e.sh gemm_multiply_add 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_multiply_add_log
|
||||
./profile_gemm_d0_d1_e.sh gemm_multiply_add 0 1 $verify 1 0 1 2>&1 | tee -a $gemm_multiply_add_log
|
||||
./profile_gemm_d0_d1_e.sh gemm_multiply_add 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_multiply_add_log
|
||||
|
||||
#run gemm_multiply_multiply_weight_preshuffle tests
|
||||
export gemm_multiply_multiply_weight_preshuffle_log="perf_gemm_multiply_multiply_weight_preshuffle.log"
|
||||
print_log_header $gemm_multiply_multiply_weight_preshuffle_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_d1_e.sh gemm_multiply_multiply_weight_preshuffle 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_multiply_multiply_weight_preshuffle_log
|
||||
./profile_gemm_d0_d1_e.sh gemm_multiply_multiply_weight_preshuffle 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_multiply_multiply_weight_preshuffle_log
|
||||
|
||||
#run gemm_multiply_multiply tests
|
||||
export gemm_multiply_multiply_log="perf_gemm_multiply_multiply.log"
|
||||
print_log_header $gemm_multiply_multiply_log $env_type $branch $host_name
|
||||
./profile_gemm_d0_d1_e.sh gemm_multiply_multiply 7 1 $verify 1 0 1 2>&1 | tee -a $gemm_multiply_multiply_log
|
||||
./profile_gemm_d0_d1_e.sh gemm_multiply_multiply 9 1 $verify 1 0 1 2>&1 | tee -a $gemm_multiply_multiply_log
|
||||
./profile_gemm_d0_d1_e.sh gemm_multiply_multiply 10 1 $verify 1 0 1 2>&1 | tee -a $gemm_multiply_multiply_log
|
||||
|
||||
#run gemm_reduce tests
|
||||
export gemm_reduce_log="perf_gemm_reduce.log"
|
||||
print_log_header $gemm_reduce_log $env_type $branch $host_name
|
||||
./profile_splitK_gemm.sh gemm_reduce 1 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_reduce 1 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_reduce_log
|
||||
|
||||
#run gemm_streamk tests
|
||||
export gemm_streamk_log="perf_gemm_streamk.log"
|
||||
print_log_header $gemm_streamk_log $env_type $branch $host_name
|
||||
./profile_gemm.sh gemm_streamk 0 0 $verify 1 0 1 2>&1 | tee -a $gemm_streamk_log
|
||||
./profile_gemm.sh gemm_streamk 1 0 $verify 1 0 1 2>&1 | tee -a $gemm_streamk_log
|
||||
./profile_gemm.sh gemm_streamk 2 0 $verify 1 0 1 2>&1 | tee -a $gemm_streamk_log
|
||||
./profile_gemm.sh gemm_streamk 3 0 $verify 1 0 1 2>&1 | tee -a $gemm_streamk_log
|
||||
./profile_gemm.sh gemm_streamk 0 1 $verify 1 0 1 2>&1 | tee -a $gemm_streamk_log
|
||||
./profile_gemm.sh gemm_streamk 1 1 $verify 1 0 1 2>&1 | tee -a $gemm_streamk_log
|
||||
./profile_gemm.sh gemm_streamk 2 1 $verify 1 0 1 2>&1 | tee -a $gemm_streamk_log
|
||||
./profile_gemm.sh gemm_streamk 3 1 $verify 1 0 1 2>&1 | tee -a $gemm_streamk_log
|
||||
|
||||
#run gemm_universal_batched tests
|
||||
export gemm_universal_batched_log="perf_gemm_universal_batched.log"
|
||||
print_log_header $gemm_universal_batched_log $env_type $branch $host_name
|
||||
./profile_gemm_universal_batched.sh gemm_universal_batched 0 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_batched_log
|
||||
./profile_gemm_universal_batched.sh gemm_universal_batched 1 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_batched_log
|
||||
./profile_gemm_universal_batched.sh gemm_universal_batched 0 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_batched_log
|
||||
./profile_gemm_universal_batched.sh gemm_universal_batched 1 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_batched_log
|
||||
|
||||
#run gemm_universal_reduce tests
|
||||
export gemm_universal_reduce_log="perf_gemm_universal_reduce.log"
|
||||
print_log_header $gemm_universal_reduce_log $env_type $branch $host_name
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 0 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 1 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 2 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 3 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 4 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 5 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 6 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 0 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 1 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 2 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 3 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 4 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 5 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
./profile_splitK_gemm.sh gemm_universal_reduce 6 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_reduce_log
|
||||
|
||||
#run gemm_universal_streamk tests
|
||||
export gemm_universal_streamk_log="perf_gemm_universal_streamk.log"
|
||||
print_log_header $gemm_universal_streamk_log $env_type $branch $host_name
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 0 0 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 1 0 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 2 0 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 3 0 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 4 0 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 5 0 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 6 0 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 0 1 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 1 1 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 2 1 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 3 1 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 4 1 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 5 1 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 6 1 $verify 1 0 1 0 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 0 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 1 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 2 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 3 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 4 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 5 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 6 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 0 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 1 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 2 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 3 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 4 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 5 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 6 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 0 0 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 1 0 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 2 0 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 3 0 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 4 0 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 5 0 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 6 0 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 0 1 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 1 1 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 2 1 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 3 1 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 4 1 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 5 1 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
./profile_gemm_universal_streamk.sh gemm_universal_streamk 6 1 $verify 1 0 1 2 2>&1 | tee -a $gemm_universal_streamk_log
|
||||
|
||||
#run gemm_universal tests
|
||||
export gemm_universal_log="perf_gemm_universal.log"
|
||||
print_log_header $gemm_universal_log $env_type $branch $host_name
|
||||
./profile_splitK_gemm.sh gemm_universal 0 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 1 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 2 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 3 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 4 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 5 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 6 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 7 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 8 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 9 0 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
|
||||
./profile_splitK_gemm.sh gemm_universal 0 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 1 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 2 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 3 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 4 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 5 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 6 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 7 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 8 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
./profile_splitK_gemm.sh gemm_universal 9 1 $verify 1 0 1 1 2>&1 | tee -a $gemm_universal_log
|
||||
|
||||
#run grouped_conv_fwd_outelementop tests
|
||||
export grouped_conv_fwd_outelementop_log="perf_grouped_conv_fwd_outelementop.log"
|
||||
print_log_header $grouped_conv_fwd_outelementop_log $env_type $branch $host_name
|
||||
./profile_grouped_conv_fwd_outelementop.sh grouped_conv_fwd_outelementop 0 0 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_outelementop_log
|
||||
./profile_grouped_conv_fwd_outelementop.sh grouped_conv_fwd_outelementop 1 0 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_outelementop_log
|
||||
./profile_grouped_conv_fwd_outelementop.sh grouped_conv_fwd_outelementop 2 0 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_outelementop_log
|
||||
./profile_grouped_conv_fwd_outelementop.sh grouped_conv_fwd_outelementop 3 0 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_outelementop_log
|
||||
./profile_grouped_conv_fwd_outelementop.sh grouped_conv_fwd_outelementop 0 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_outelementop_log
|
||||
./profile_grouped_conv_fwd_outelementop.sh grouped_conv_fwd_outelementop 1 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_outelementop_log
|
||||
./profile_grouped_conv_fwd_outelementop.sh grouped_conv_fwd_outelementop 2 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_outelementop_log
|
||||
./profile_grouped_conv_fwd_outelementop.sh grouped_conv_fwd_outelementop 3 1 1 $verify 1 0 1 256 2>&1 | tee -a $grouped_conv_fwd_outelementop_log
|
||||
|
||||
#run grouped_gemm_fastgelu tests
|
||||
export grouped_gemm_fastgelu_log="perf_grouped_gemm_fastgelu.log"
|
||||
print_log_header $grouped_gemm_fastgelu_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm.sh grouped_gemm_fastgelu 1 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fastgelu_log
|
||||
./profile_grouped_gemm.sh grouped_gemm_fastgelu 1 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fastgelu_log
|
||||
|
||||
#run grouped_gemm_fixed_nk tests
|
||||
export grouped_gemm_fixed_nk_log="perf_grouped_gemm_fixed_nk.log"
|
||||
print_log_header $grouped_gemm_fixed_nk_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm_fixed_nk.sh grouped_gemm_fixed_nk 0 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fixed_nk_log
|
||||
./profile_grouped_gemm_fixed_nk.sh grouped_gemm_fixed_nk 1 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fixed_nk_log
|
||||
./profile_grouped_gemm_fixed_nk.sh grouped_gemm_fixed_nk 2 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fixed_nk_log
|
||||
./profile_grouped_gemm_fixed_nk.sh grouped_gemm_fixed_nk 3 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fixed_nk_log
|
||||
./profile_grouped_gemm_fixed_nk.sh grouped_gemm_fixed_nk 0 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fixed_nk_log
|
||||
./profile_grouped_gemm_fixed_nk.sh grouped_gemm_fixed_nk 1 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fixed_nk_log
|
||||
./profile_grouped_gemm_fixed_nk.sh grouped_gemm_fixed_nk 2 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fixed_nk_log
|
||||
./profile_grouped_gemm_fixed_nk.sh grouped_gemm_fixed_nk 3 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_fixed_nk_log
|
||||
|
||||
#run grouped_gemm_multiply_tile_loop tests
|
||||
export grouped_gemm_multiply_tile_loop_log="perf_grouped_gemm_multiply_tile_loop.log"
|
||||
print_log_header $grouped_gemm_multiply_tile_loop_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm.sh grouped_gemm_multiply_tile_loop 0 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_multiply_tile_loop_log
|
||||
|
||||
#run grouped_gemm_tile_loop tests
|
||||
export grouped_gemm_tile_loop_log="perf_grouped_gemm_tile_loop.log"
|
||||
print_log_header $grouped_gemm_tile_loop_log $env_type $branch $host_name
|
||||
./profile_grouped_gemm.sh grouped_gemm_tile_loop 0 0 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_tile_loop_log
|
||||
./profile_grouped_gemm.sh grouped_gemm_tile_loop 0 1 $verify 1 0 1 2>&1 | tee -a $grouped_gemm_tile_loop_log
|
||||
|
||||
#run groupnorm tests
|
||||
export groupnorm_log="perf_groupnorm.log"
|
||||
print_log_header $groupnorm_log $env_type $branch $host_name
|
||||
./profile_groupnorm.sh groupnorm 0 $verify 1 0 1 2>&1 | tee -a $groupnorm_log
|
||||
./profile_groupnorm.sh groupnorm 1 $verify 1 0 1 2>&1 | tee -a $groupnorm_log
|
||||
|
||||
#run permute_scale tests
|
||||
export permute_scale_log="perf_permute_scale.log"
|
||||
print_log_header $permute_scale_log $env_type $branch $host_name
|
||||
./profile_permute_scale.sh permute_scale 0 $verify 1 0 1 2>&1 | tee -a $permute_scale_log
|
||||
./profile_permute_scale.sh permute_scale 1 $verify 1 0 1 2>&1 | tee -a $permute_scale_log
|
||||
|
||||
#run transpose tests
|
||||
export transpose_log="perf_transpose.log"
|
||||
print_log_header $transpose_log $env_type $branch $host_name
|
||||
./profile_transpose.sh transpose 0 $verify 1 0 1 2>&1 | tee -a $transpose_log
|
||||
./profile_transpose.sh transpose 1 $verify 1 0 1 2>&1 | tee -a $transpose_log
|
||||
|
||||
#run avg_pool2d_bwd tests
|
||||
export avg_pool2d_bwd_log="perf_avg_pool2d_bwd.log"
|
||||
print_log_header $avg_pool2d_bwd_log $env_type $branch $host_name
|
||||
./profile_avg_pool2d_bwd.sh avg_pool2d_bwd 0 $verify 1 0 1 2>&1 | tee -a $avg_pool2d_bwd_log
|
||||
./profile_avg_pool2d_bwd.sh avg_pool2d_bwd 1 $verify 1 0 1 2>&1 | tee -a $avg_pool2d_bwd_log
|
||||
./profile_avg_pool2d_bwd.sh avg_pool2d_bwd 3 $verify 1 0 1 2>&1 | tee -a $avg_pool2d_bwd_log
|
||||
./profile_avg_pool2d_bwd.sh avg_pool2d_bwd 5 $verify 1 0 1 2>&1 | tee -a $avg_pool2d_bwd_log
|
||||
./profile_avg_pool2d_bwd.sh avg_pool2d_bwd 7 $verify 1 0 1 2>&1 | tee -a $avg_pool2d_bwd_log
|
||||
|
||||
#run avg_pool3d_bwd tests
|
||||
export avg_pool3d_bwd_log="perf_avg_pool3d_bwd.log"
|
||||
print_log_header $avg_pool3d_bwd_log $env_type $branch $host_name
|
||||
./profile_avg_pool3d_bwd.sh avg_pool3d_bwd 0 $verify 1 0 1 2>&1 | tee -a $avg_pool3d_bwd_log
|
||||
./profile_avg_pool3d_bwd.sh avg_pool3d_bwd 1 $verify 1 0 1 2>&1 | tee -a $avg_pool3d_bwd_log
|
||||
./profile_avg_pool3d_bwd.sh avg_pool3d_bwd 5 $verify 1 0 1 2>&1 | tee -a $avg_pool3d_bwd_log
|
||||
|
||||
#run bnorm_bwd tests
|
||||
export bnorm_bwd_log="perf_bnorm_bwd.log"
|
||||
print_log_header $bnorm_bwd_log $env_type $branch $host_name
|
||||
./profile_bnorm.sh bnorm_bwd 0 $verify 0 1 0 2>&1 | tee -a $bnorm_bwd_log
|
||||
./profile_bnorm.sh bnorm_bwd 1 $verify 0 1 0 2>&1 | tee -a $bnorm_bwd_log
|
||||
./profile_bnorm.sh bnorm_bwd 5 $verify 0 1 0 2>&1 | tee -a $bnorm_bwd_log
|
||||
./profile_bnorm.sh bnorm_bwd 6 $verify 0 1 0 2>&1 | tee -a $bnorm_bwd_log
|
||||
|
||||
#run bnorm_fwd tests
|
||||
export bnorm_fwd_log="perf_bnorm_fwd.log"
|
||||
print_log_header $bnorm_fwd_log $env_type $branch $host_name
|
||||
./profile_bnorm_fwd.sh bnorm_fwd 0 $verify 0 1 0 2>&1 | tee -a $bnorm_fwd_log
|
||||
./profile_bnorm_fwd.sh bnorm_fwd 1 $verify 0 1 0 2>&1 | tee -a $bnorm_fwd_log
|
||||
./profile_bnorm_fwd.sh bnorm_fwd 5 $verify 0 1 0 2>&1 | tee -a $bnorm_fwd_log
|
||||
./profile_bnorm_fwd.sh bnorm_fwd 6 $verify 0 1 0 2>&1 | tee -a $bnorm_fwd_log
|
||||
|
||||
#run bnorm_infer tests
|
||||
export bnorm_infer_log="perf_bnorm_infer.log"
|
||||
print_log_header $bnorm_infer_log $env_type $branch $host_name
|
||||
./profile_bnorm.sh bnorm_infer 0 $verify 0 1 0 2>&1 | tee -a $bnorm_infer_log
|
||||
./profile_bnorm.sh bnorm_infer 1 $verify 0 1 0 2>&1 | tee -a $bnorm_infer_log
|
||||
./profile_bnorm.sh bnorm_infer 5 $verify 0 1 0 2>&1 | tee -a $bnorm_infer_log
|
||||
./profile_bnorm.sh bnorm_infer 6 $verify 0 1 0 2>&1 | tee -a $bnorm_infer_log
|
||||
|
||||
#run groupnorm_bwd_data tests
|
||||
export groupnorm_bwd_data_log="perf_groupnorm_bwd_data.log"
|
||||
print_log_header $groupnorm_bwd_data_log $env_type $branch $host_name
|
||||
./profile_groupnorm.sh groupnorm_bwd_data 1 $verify 1 0 1 2>&1 | tee -a $groupnorm_bwd_data_log
|
||||
|
||||
#run groupnorm_bwd_gamma_beta tests
|
||||
export groupnorm_bwd_gamma_beta_log="perf_groupnorm_bwd_gamma_beta.log"
|
||||
print_log_header $groupnorm_bwd_gamma_beta_log $env_type $branch $host_name
|
||||
./profile_groupnorm.sh groupnorm_bwd_gamma_beta 0 $verify 1 0 1 2>&1 | tee -a $groupnorm_bwd_gamma_beta_log
|
||||
./profile_groupnorm.sh groupnorm_bwd_gamma_beta 1 $verify 1 0 1 2>&1 | tee -a $groupnorm_bwd_gamma_beta_log
|
||||
|
||||
#run layernorm_bwd_data tests
|
||||
export layernorm_bwd_data_log="perf_layernorm_bwd_data.log"
|
||||
print_log_header $layernorm_bwd_data_log $env_type $branch $host_name
|
||||
./profile_layernorm.sh layernorm_bwd_data 0 $verify 1 0 1 2>&1 | tee -a $layernorm_bwd_data_log
|
||||
./profile_layernorm.sh layernorm_bwd_data 1 $verify 1 0 1 2>&1 | tee -a $layernorm_bwd_data_log
|
||||
|
||||
#run layernorm_bwd_gamma_beta tests
|
||||
export layernorm_bwd_gamma_beta_log="perf_layernorm_bwd_gamma_beta.log"
|
||||
print_log_header $layernorm_bwd_gamma_beta_log $env_type $branch $host_name
|
||||
./profile_layernorm.sh layernorm_bwd_gamma_beta 0 $verify 1 0 1 2>&1 | tee -a $layernorm_bwd_gamma_beta_log
|
||||
./profile_layernorm.sh layernorm_bwd_gamma_beta 1 $verify 1 0 1 2>&1 | tee -a $layernorm_bwd_gamma_beta_log
|
||||
|
||||
#run layernorm_fwd tests
|
||||
export layernorm_fwd_log="perf_layernorm_fwd.log"
|
||||
print_log_header $layernorm_fwd_log $env_type $branch $host_name
|
||||
./profile_layernorm.sh layernorm_fwd 0 $verify 1 0 1 2>&1 | tee -a $layernorm_fwd_log
|
||||
./profile_layernorm.sh layernorm_fwd 1 $verify 1 0 1 2>&1 | tee -a $layernorm_fwd_log
|
||||
|
||||
#run max_pool2d_bwd tests
|
||||
export max_pool2d_bwd_log="perf_max_pool2d_bwd.log"
|
||||
print_log_header $max_pool2d_bwd_log $env_type $branch $host_name
|
||||
./profile_max_pool2d_bwd.sh max_pool2d_bwd 0 $verify 1 0 1 2>&1 | tee -a $max_pool2d_bwd_log
|
||||
./profile_max_pool2d_bwd.sh max_pool2d_bwd 1 $verify 1 0 1 2>&1 | tee -a $max_pool2d_bwd_log
|
||||
./profile_max_pool2d_bwd.sh max_pool2d_bwd 3 $verify 1 0 1 2>&1 | tee -a $max_pool2d_bwd_log
|
||||
./profile_max_pool2d_bwd.sh max_pool2d_bwd 5 $verify 1 0 1 2>&1 | tee -a $max_pool2d_bwd_log
|
||||
|
||||
#run max_pool2d_fwd tests
|
||||
export max_pool2d_fwd_log="perf_max_pool2d_fwd.log"
|
||||
print_log_header $max_pool2d_fwd_log $env_type $branch $host_name
|
||||
./profile_max_pool2d_fwd.sh max_pool2d_fwd 0 $verify 1 0 1 2>&1 | tee -a $max_pool2d_fwd_log
|
||||
./profile_max_pool2d_fwd.sh max_pool2d_fwd 1 $verify 1 0 1 2>&1 | tee -a $max_pool2d_fwd_log
|
||||
./profile_max_pool2d_fwd.sh max_pool2d_fwd 2 $verify 1 0 1 2>&1 | tee -a $max_pool2d_fwd_log
|
||||
./profile_max_pool2d_fwd.sh max_pool2d_fwd 3 $verify 1 0 1 2>&1 | tee -a $max_pool2d_fwd_log
|
||||
./profile_max_pool2d_fwd.sh max_pool2d_fwd 4 $verify 1 0 1 2>&1 | tee -a $max_pool2d_fwd_log
|
||||
|
||||
#run max_pool3d_bwd tests
|
||||
export max_pool3d_bwd_log="perf_max_pool3d_bwd.log"
|
||||
print_log_header $max_pool3d_bwd_log $env_type $branch $host_name
|
||||
./profile_max_pool3d.sh max_pool3d_bwd 0 $verify 1 0 1 2>&1 | tee -a $max_pool3d_bwd_log
|
||||
./profile_max_pool3d.sh max_pool3d_bwd 1 $verify 1 0 1 2>&1 | tee -a $max_pool3d_bwd_log
|
||||
./profile_max_pool3d.sh max_pool3d_bwd 5 $verify 1 0 1 2>&1 | tee -a $max_pool3d_bwd_log
|
||||
|
||||
#run pool3d_fwd tests
|
||||
export pool3d_fwd_log="perf_pool3d_fwd.log"
|
||||
print_log_header $pool3d_fwd_log $env_type $branch $host_name
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 0 $verify 1 0 1 0 2>&1 | tee -a $pool3d_fwd_log
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 1 $verify 1 0 1 0 2>&1 | tee -a $pool3d_fwd_log
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 3 $verify 1 0 1 0 2>&1 | tee -a $pool3d_fwd_log
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 5 $verify 1 0 1 0 2>&1 | tee -a $pool3d_fwd_log
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 7 $verify 1 0 1 0 2>&1 | tee -a $pool3d_fwd_log
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 0 $verify 1 0 1 1 2>&1 | tee -a $pool3d_fwd_log
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 1 $verify 1 0 1 1 2>&1 | tee -a $pool3d_fwd_log
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 3 $verify 1 0 1 1 2>&1 | tee -a $pool3d_fwd_log
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 5 $verify 1 0 1 1 2>&1 | tee -a $pool3d_fwd_log
|
||||
./profile_pool3d_fwd.sh pool3d_fwd 7 $verify 1 0 1 1 2>&1 | tee -a $pool3d_fwd_log
|
||||
|
||||
#run softmax tests
|
||||
export softmax_log="perf_softmax.log"
|
||||
print_log_header $softmax_log $env_type $branch $host_name
|
||||
./profile_softmax.sh softmax 0 $verify 1 0 1 2>&1 | tee -a $softmax_log
|
||||
./profile_softmax.sh softmax 1 $verify 1 0 1 2>&1 | tee -a $softmax_log
|
||||
./profile_softmax.sh softmax 2 $verify 1 0 1 2>&1 | tee -a $softmax_log
|
||||
./profile_softmax.sh softmax 3 $verify 1 0 1 2>&1 | tee -a $softmax_log
|
||||
fi
|
||||
Reference in New Issue
Block a user