mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 01:36:06 +00:00
Add host API (#220)
* Add host API * manually rebase on develop * clean * manually rebase on develop * exclude tests from all target * address review comments * update client app name * fix missing lib name * clang-format update * refactor * refactor * refactor * refactor * refactor * fix test issue * refactor * refactor * refactor * upate cmake and readme Co-authored-by: Chao Liu <chao.liu2@amd.com>
This commit is contained in:
@@ -48,8 +48,8 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
printf(" 3: A[g, k, m] * B[g, n, k] = C[g, m, n])\n");
|
||||
printf("arg4: verification (0: no; 1: yes)\n");
|
||||
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: run kernel # of times (>1)\n");
|
||||
printf("arg6: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount\n");
|
||||
exit(1);
|
||||
}
|
||||
@@ -59,7 +59,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[4]);
|
||||
const int init_method = std::stoi(argv[5]);
|
||||
const bool do_log = std::stoi(argv[6]);
|
||||
const int nrepeat = std::stoi(argv[7]);
|
||||
const bool time_kernel = std::stoi(argv[7]);
|
||||
|
||||
const int M = std::stoi(argv[8]);
|
||||
const int N = std::stoi(argv[9]);
|
||||
@@ -82,7 +82,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -102,7 +102,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -122,7 +122,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -142,7 +142,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -162,7 +162,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -182,7 +182,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -202,7 +202,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -222,7 +222,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -242,7 +242,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -262,7 +262,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -282,7 +282,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -302,7 +302,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -322,7 +322,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -342,7 +342,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -362,7 +362,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -382,7 +382,7 @@ int profile_batched_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
|
||||
@@ -33,8 +33,8 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
|
||||
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
|
||||
printf("arg4: verification (0: no; 1: yes)\n");
|
||||
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: run kernel # of times (>1)\n");
|
||||
printf("arg6: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, BatchCount\n");
|
||||
printf("arg15: split k into mulitiple batch\n");
|
||||
exit(1);
|
||||
@@ -45,7 +45,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[4]);
|
||||
const int init_method = std::stoi(argv[5]);
|
||||
const bool do_log = std::stoi(argv[6]);
|
||||
const int nrepeat = std::stoi(argv[7]);
|
||||
const bool time_kernel = std::stoi(argv[7]);
|
||||
|
||||
const int M = std::stoi(argv[8]);
|
||||
const int N = std::stoi(argv[9]);
|
||||
@@ -69,7 +69,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -91,7 +91,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -113,7 +113,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -135,7 +135,7 @@ int profile_batched_gemm_reduce(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
|
||||
@@ -44,7 +44,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
|
||||
printf("arg6: verification (0: no; 1: yes)\n");
|
||||
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg9: run kernel # of times (>1)\n");
|
||||
printf("arg9: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
|
||||
"RightPx\n");
|
||||
exit(1);
|
||||
@@ -57,7 +57,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[6]);
|
||||
const int init_method = std::stoi(argv[7]);
|
||||
const bool do_log = std::stoi(argv[8]);
|
||||
const int nrepeat = std::stoi(argv[9]);
|
||||
const bool time_kernel = std::stoi(argv[9]);
|
||||
|
||||
const ck::index_t N = std::stoi(argv[10]);
|
||||
const ck::index_t K = std::stoi(argv[11]);
|
||||
@@ -96,7 +96,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
StreamControl{nullptr, time_kernel},
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
@@ -122,7 +122,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
StreamControl{nullptr, time_kernel},
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
@@ -148,7 +148,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
StreamControl{nullptr, time_kernel},
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
@@ -174,7 +174,7 @@ int profile_conv_bwd_data(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
StreamControl{nullptr, time_kernel},
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
|
||||
@@ -58,7 +58,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[6]);
|
||||
const int init_method = std::stoi(argv[7]);
|
||||
const bool do_log = std::stoi(argv[8]);
|
||||
const int nrepeat = std::stoi(argv[9]);
|
||||
const bool time_kernel = std::stoi(argv[9]);
|
||||
|
||||
const ck::index_t N = std::stoi(argv[10]);
|
||||
const ck::index_t K = std::stoi(argv[11]);
|
||||
@@ -98,7 +98,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
@@ -124,7 +124,7 @@ int profile_conv_bwd_weight(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
|
||||
@@ -42,7 +42,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
|
||||
printf("arg6: verification (0: no; 1: yes)\n");
|
||||
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg9: run kernel # of times (>1)\n");
|
||||
printf("arg9: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
|
||||
"RightPx\n");
|
||||
exit(1);
|
||||
@@ -55,7 +55,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[6]);
|
||||
const int init_method = std::stoi(argv[7]);
|
||||
const bool do_log = std::stoi(argv[8]);
|
||||
const int nrepeat = std::stoi(argv[9]);
|
||||
const bool time_kernel = std::stoi(argv[9]);
|
||||
|
||||
const ck::index_t N = std::stoi(argv[10]);
|
||||
const ck::index_t K = std::stoi(argv[11]);
|
||||
@@ -93,7 +93,7 @@ int profile_conv_fwd_bias_relu(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
|
||||
@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
|
||||
printf("arg6: verification (0: no; 1: yes)\n");
|
||||
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg9: run kernel # of times (>1)\n");
|
||||
printf("arg9: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
|
||||
"RightPx\n");
|
||||
exit(1);
|
||||
@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[6]);
|
||||
const int init_method = std::stoi(argv[7]);
|
||||
const bool do_log = std::stoi(argv[8]);
|
||||
const int nrepeat = std::stoi(argv[9]);
|
||||
const bool time_kernel = std::stoi(argv[9]);
|
||||
|
||||
const ck::index_t N = std::stoi(argv[10]);
|
||||
const ck::index_t K = std::stoi(argv[11]);
|
||||
@@ -94,7 +94,7 @@ int profile_conv_fwd_bias_relu_add(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
|
||||
@@ -43,7 +43,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
|
||||
printf("arg6: verification (0: no; 1: yes)\n");
|
||||
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg9: run kernel # of times (>1)\n");
|
||||
printf("arg9: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
|
||||
"RightPx\n");
|
||||
exit(1);
|
||||
@@ -56,7 +56,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[6]);
|
||||
const int init_method = std::stoi(argv[7]);
|
||||
const bool do_log = std::stoi(argv[8]);
|
||||
const int nrepeat = std::stoi(argv[9]);
|
||||
const bool time_kernel = std::stoi(argv[9]);
|
||||
|
||||
const ck::index_t N = std::stoi(argv[10]);
|
||||
const ck::index_t K = std::stoi(argv[11]);
|
||||
@@ -95,7 +95,7 @@ int profile_conv_fwd_bias_relu_atomic_add(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
|
||||
@@ -95,7 +95,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
|
||||
printf("arg6: verification (0: no; 1: yes)\n");
|
||||
printf("arg7: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg9: run kernel # of times (>1)\n");
|
||||
printf("arg9: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
|
||||
"RightPx\n");
|
||||
return 1;
|
||||
@@ -108,7 +108,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
|
||||
const bool do_verification = std::stoi(argv[6]);
|
||||
const int init_method = std::stoi(argv[7]);
|
||||
const bool do_log = std::stoi(argv[8]);
|
||||
const int nrepeat = std::stoi(argv[9]);
|
||||
const bool time_kernel = std::stoi(argv[9]);
|
||||
|
||||
ck::utils::conv::ConvParams params = parse_conv_params(num_dim_spatial, argv, preParams);
|
||||
|
||||
@@ -132,7 +132,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
params.N_,
|
||||
params.K_,
|
||||
params.C_,
|
||||
@@ -157,7 +157,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
params.N_,
|
||||
params.K_,
|
||||
params.C_,
|
||||
@@ -182,7 +182,7 @@ int profile_convnd_bwd_data(int argc, char* argv[], int num_dim_spatial)
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
params.N_,
|
||||
params.K_,
|
||||
params.C_,
|
||||
|
||||
@@ -119,7 +119,7 @@ template <int NDim,
|
||||
void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
|
||||
bool do_verification,
|
||||
bool do_log,
|
||||
int nrepeat,
|
||||
bool time_kernel,
|
||||
int init_method,
|
||||
ConvLayouts)
|
||||
{
|
||||
@@ -185,7 +185,7 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
|
||||
reference_conv_fwd_fun);
|
||||
auto best_conf = run_engine.Profile(
|
||||
conv::ConvolutionFwdInstances<InDataType, WeiDataType, OutDataType>::template Get<NDim>(),
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
do_verification,
|
||||
do_log);
|
||||
|
||||
@@ -201,7 +201,7 @@ void profile_convnd_instances(ConvDataType data_type,
|
||||
const ck::utils::conv::ConvParams& params,
|
||||
bool do_verification,
|
||||
bool do_log,
|
||||
int nrepeat,
|
||||
bool time_kernel,
|
||||
int init_method)
|
||||
{
|
||||
switch(data_layout)
|
||||
@@ -214,7 +214,7 @@ void profile_convnd_instances(ConvDataType data_type,
|
||||
params,
|
||||
do_verification,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
init_method,
|
||||
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
|
||||
break;
|
||||
@@ -223,7 +223,7 @@ void profile_convnd_instances(ConvDataType data_type,
|
||||
params,
|
||||
do_verification,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
init_method,
|
||||
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
|
||||
break;
|
||||
@@ -232,7 +232,7 @@ void profile_convnd_instances(ConvDataType data_type,
|
||||
params,
|
||||
do_verification,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
init_method,
|
||||
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
|
||||
break;
|
||||
@@ -241,7 +241,7 @@ void profile_convnd_instances(ConvDataType data_type,
|
||||
params,
|
||||
do_verification,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
init_method,
|
||||
ConvolutionLayouts<NDim, ConvDataLayout::NHWC>{});
|
||||
break;
|
||||
@@ -256,7 +256,7 @@ void profile_convnd_instances(ConvDataType data_type,
|
||||
params,
|
||||
do_verification,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
init_method,
|
||||
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
|
||||
break;
|
||||
@@ -265,7 +265,7 @@ void profile_convnd_instances(ConvDataType data_type,
|
||||
params,
|
||||
do_verification,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
init_method,
|
||||
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
|
||||
break;
|
||||
@@ -274,7 +274,7 @@ void profile_convnd_instances(ConvDataType data_type,
|
||||
params,
|
||||
do_verification,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
init_method,
|
||||
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
|
||||
break;
|
||||
@@ -283,7 +283,7 @@ void profile_convnd_instances(ConvDataType data_type,
|
||||
params,
|
||||
do_verification,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
init_method,
|
||||
ConvolutionLayouts<NDim, ConvDataLayout::NCHW>{});
|
||||
break;
|
||||
@@ -304,7 +304,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
|
||||
bool do_verification{true};
|
||||
int init_method{2};
|
||||
bool do_log{false};
|
||||
int nrepeat{100};
|
||||
bool time_kernel{false};
|
||||
int num_dim_spatial{2};
|
||||
ConvParams params;
|
||||
|
||||
@@ -318,7 +318,7 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
|
||||
do_verification = std::stoi(argv[4]);
|
||||
init_method = std::stoi(argv[5]);
|
||||
do_log = std::stoi(argv[6]);
|
||||
nrepeat = std::stoi(argv[7]);
|
||||
time_kernel = std::stoi(argv[7]);
|
||||
num_dim_spatial = std::stoi(argv[8]);
|
||||
}
|
||||
if(argc >= 10)
|
||||
@@ -332,15 +332,15 @@ int ck::profiler::profile_convnd_fwd(int argc, char* argv[])
|
||||
{
|
||||
case 1:
|
||||
profile_convnd_instances<1>(
|
||||
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method);
|
||||
data_type, data_layout, params, do_verification, do_log, time_kernel, init_method);
|
||||
break;
|
||||
case 2:
|
||||
profile_convnd_instances<2>(
|
||||
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method);
|
||||
data_type, data_layout, params, do_verification, do_log, time_kernel, init_method);
|
||||
break;
|
||||
case 3:
|
||||
profile_convnd_instances<3>(
|
||||
data_type, data_layout, params, do_verification, do_log, nrepeat, init_method);
|
||||
data_type, data_layout, params, do_verification, do_log, time_kernel, init_method);
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("profile_conv_fwd: unsupported num_dim_spatial value: " +
|
||||
|
||||
@@ -38,8 +38,8 @@ int profile_gemm(int argc, char* argv[])
|
||||
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
|
||||
printf("arg4: verification (0: no; 1: yes)\n");
|
||||
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: run kernel # of times (>1)\n");
|
||||
printf("arg6: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
|
||||
printf("arg14: split k into mulitiple batch\n");
|
||||
exit(1);
|
||||
@@ -50,7 +50,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[4]);
|
||||
const int init_method = std::stoi(argv[5]);
|
||||
const bool do_log = std::stoi(argv[6]);
|
||||
const int nrepeat = std::stoi(argv[7]);
|
||||
const bool time_kernel = std::stoi(argv[7]);
|
||||
|
||||
const int M = std::stoi(argv[8]);
|
||||
const int N = std::stoi(argv[9]);
|
||||
@@ -74,7 +74,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -94,7 +94,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -114,7 +114,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -134,7 +134,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -154,7 +154,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -174,7 +174,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -194,7 +194,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -214,7 +214,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -234,7 +234,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -254,7 +254,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -274,7 +274,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -294,7 +294,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -314,7 +314,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -334,7 +334,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -354,7 +354,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -374,7 +374,7 @@ int profile_gemm(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
|
||||
@@ -36,8 +36,8 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
|
||||
printf("arg4: verification (0: no; 1: yes)\n");
|
||||
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: run kernel # of times (>1)\n");
|
||||
printf("arg6: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
|
||||
printf("arg14: alpha\n");
|
||||
printf("arg15: beta\n");
|
||||
@@ -50,7 +50,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[4]);
|
||||
const int init_method = std::stoi(argv[5]);
|
||||
const bool do_log = std::stoi(argv[6]);
|
||||
const int nrepeat = std::stoi(argv[7]);
|
||||
const bool time_kernel = std::stoi(argv[7]);
|
||||
|
||||
const int M = std::stoi(argv[8]);
|
||||
const int N = std::stoi(argv[9]);
|
||||
@@ -76,7 +76,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -99,7 +99,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -122,7 +122,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -145,7 +145,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -168,7 +168,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -191,7 +191,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -214,7 +214,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -237,7 +237,7 @@ int profile_gemm_bias_2d(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
|
||||
@@ -36,8 +36,8 @@ int profile_gemm_bias_relu(int argc, char* argv[])
|
||||
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
|
||||
printf("arg4: verification (0: no; 1: yes)\n");
|
||||
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: run kernel # of times (>1)\n");
|
||||
printf("arg6: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
|
||||
printf("arg14: split k into mulitiple batch\n");
|
||||
exit(1);
|
||||
@@ -48,7 +48,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[4]);
|
||||
const int init_method = std::stoi(argv[5]);
|
||||
const bool do_log = std::stoi(argv[6]);
|
||||
const int nrepeat = std::stoi(argv[7]);
|
||||
const bool time_kernel = std::stoi(argv[7]);
|
||||
|
||||
const int M = std::stoi(argv[8]);
|
||||
const int N = std::stoi(argv[9]);
|
||||
@@ -69,7 +69,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -88,7 +88,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -107,7 +107,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -126,7 +126,7 @@ int profile_gemm_bias_relu(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
|
||||
@@ -36,8 +36,8 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
|
||||
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
|
||||
printf("arg4: verification (0: no; 1: yes)\n");
|
||||
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: run kernel # of times (>1)\n");
|
||||
printf("arg6: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg8 to 14: M, N, K, StrideA, StrideB, StrideC, StrideC1\n");
|
||||
printf("arg15: split k into mulitiple batch\n");
|
||||
exit(1);
|
||||
@@ -48,7 +48,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[4]);
|
||||
const int init_method = std::stoi(argv[5]);
|
||||
const bool do_log = std::stoi(argv[6]);
|
||||
const int nrepeat = std::stoi(argv[7]);
|
||||
const bool time_kernel = std::stoi(argv[7]);
|
||||
|
||||
const int M = std::stoi(argv[8]);
|
||||
const int N = std::stoi(argv[9]);
|
||||
@@ -70,7 +70,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -90,7 +90,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -110,7 +110,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -130,7 +130,7 @@ int profile_gemm_bias_relu_add(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
|
||||
@@ -32,8 +32,8 @@ int profile_gemm_reduce(int argc, char* argv[])
|
||||
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
|
||||
printf("arg4: verification (0: no; 1: yes)\n");
|
||||
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: run kernel # of times (>1)\n");
|
||||
printf("arg6: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg8 to 13: M, N, K, StrideA, StrideB, StrideC\n");
|
||||
printf("arg14: split k into mulitiple batch\n");
|
||||
exit(1);
|
||||
@@ -44,7 +44,7 @@ int profile_gemm_reduce(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[4]);
|
||||
const int init_method = std::stoi(argv[5]);
|
||||
const bool do_log = std::stoi(argv[6]);
|
||||
const int nrepeat = std::stoi(argv[7]);
|
||||
const bool time_kernel = std::stoi(argv[7]);
|
||||
|
||||
const int M = std::stoi(argv[8]);
|
||||
const int N = std::stoi(argv[9]);
|
||||
@@ -66,7 +66,7 @@ int profile_gemm_reduce(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -87,7 +87,7 @@ int profile_gemm_reduce(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -108,7 +108,7 @@ int profile_gemm_reduce(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
@@ -129,7 +129,7 @@ int profile_gemm_reduce(int argc, char* argv[])
|
||||
do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
|
||||
@@ -54,8 +54,8 @@ int profile_grouped_gemm(int argc, char* argv[])
|
||||
printf(" 3: A[k, m] * B[n, k] = C[m, n])\n");
|
||||
printf("arg4: verification (0: no; 1: yes)\n");
|
||||
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
|
||||
printf("arg8: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: run kernel # of times (>1)\n");
|
||||
printf("arg6: print tensor value (0: no; 1: yes)\n");
|
||||
printf("arg7: time kernel (0=n0, 1=yes)\n");
|
||||
printf("arg8 to 13: Ms, Ns, Ks, StrideAs, StrideBs, StrideCs (e.g., 256,256 128,128 64,64 "
|
||||
"64,64 64,64 128,128)\n");
|
||||
exit(1);
|
||||
@@ -66,7 +66,7 @@ int profile_grouped_gemm(int argc, char* argv[])
|
||||
const bool do_verification = std::stoi(argv[4]);
|
||||
const int init_method = std::stoi(argv[5]);
|
||||
const bool do_log = std::stoi(argv[6]);
|
||||
const int nrepeat = std::stoi(argv[7]);
|
||||
const bool time_kernel = std::stoi(argv[7]);
|
||||
|
||||
const auto Ms = argToIntArray(argv[8]);
|
||||
const auto Ns = argToIntArray(argv[9]);
|
||||
@@ -86,7 +86,7 @@ int profile_grouped_gemm(int argc, char* argv[])
|
||||
ck::tensor_layout::gemm::RowMajor>(do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
Ms,
|
||||
Ns,
|
||||
Ks,
|
||||
@@ -104,7 +104,7 @@ int profile_grouped_gemm(int argc, char* argv[])
|
||||
ck::tensor_layout::gemm::RowMajor>(do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
Ms,
|
||||
Ns,
|
||||
Ks,
|
||||
@@ -122,7 +122,7 @@ int profile_grouped_gemm(int argc, char* argv[])
|
||||
ck::tensor_layout::gemm::RowMajor>(do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
Ms,
|
||||
Ns,
|
||||
Ks,
|
||||
@@ -140,7 +140,7 @@ int profile_grouped_gemm(int argc, char* argv[])
|
||||
ck::tensor_layout::gemm::RowMajor>(do_verification,
|
||||
init_method,
|
||||
do_log,
|
||||
nrepeat,
|
||||
time_kernel,
|
||||
Ms,
|
||||
Ns,
|
||||
Ks,
|
||||
|
||||
@@ -144,7 +144,7 @@ class AppArgs
|
||||
bool do_dumpout = false;
|
||||
|
||||
int init_method;
|
||||
int nrepeat;
|
||||
bool time_kernel;
|
||||
|
||||
bool need_indices = false;
|
||||
|
||||
@@ -295,7 +295,7 @@ class AppArgs
|
||||
throw std::runtime_error("Invalid cmd-line arguments, more argumetns are needed!");
|
||||
|
||||
init_method = std::atoi(argv[optind++]);
|
||||
nrepeat = std::atoi(argv[optind]);
|
||||
time_kernel = std::atoi(argv[optind]);
|
||||
|
||||
if(scales.empty())
|
||||
{
|
||||
@@ -354,7 +354,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.init_method,
|
||||
args.do_log,
|
||||
args.do_dumpout,
|
||||
args.nrepeat,
|
||||
args.time_kernel,
|
||||
args.inLengths,
|
||||
args.reduceDims,
|
||||
args.reduceOp,
|
||||
@@ -369,7 +369,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.init_method,
|
||||
args.do_log,
|
||||
args.do_dumpout,
|
||||
args.nrepeat,
|
||||
args.time_kernel,
|
||||
args.inLengths,
|
||||
args.reduceDims,
|
||||
args.reduceOp,
|
||||
@@ -387,7 +387,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.init_method,
|
||||
args.do_log,
|
||||
args.do_dumpout,
|
||||
args.nrepeat,
|
||||
args.time_kernel,
|
||||
args.inLengths,
|
||||
args.reduceDims,
|
||||
args.reduceOp,
|
||||
@@ -414,7 +414,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.init_method,
|
||||
args.do_log,
|
||||
args.do_dumpout,
|
||||
args.nrepeat,
|
||||
args.time_kernel,
|
||||
args.inLengths,
|
||||
args.reduceDims,
|
||||
args.reduceOp,
|
||||
@@ -429,7 +429,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.init_method,
|
||||
args.do_log,
|
||||
args.do_dumpout,
|
||||
args.nrepeat,
|
||||
args.time_kernel,
|
||||
args.inLengths,
|
||||
args.reduceDims,
|
||||
args.reduceOp,
|
||||
@@ -454,7 +454,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.init_method,
|
||||
args.do_log,
|
||||
args.do_dumpout,
|
||||
args.nrepeat,
|
||||
args.time_kernel,
|
||||
args.inLengths,
|
||||
args.reduceDims,
|
||||
args.reduceOp,
|
||||
@@ -471,7 +471,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.init_method,
|
||||
args.do_log,
|
||||
args.do_dumpout,
|
||||
args.nrepeat,
|
||||
args.time_kernel,
|
||||
args.inLengths,
|
||||
args.reduceDims,
|
||||
args.reduceOp,
|
||||
@@ -486,7 +486,7 @@ int profile_reduce(int argc, char* argv[])
|
||||
args.init_method,
|
||||
args.do_log,
|
||||
args.do_dumpout,
|
||||
args.nrepeat,
|
||||
args.time_kernel,
|
||||
args.inLengths,
|
||||
args.reduceDims,
|
||||
args.reduceOp,
|
||||
|
||||
Reference in New Issue
Block a user