mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
[CK][EXAMPLES] (#2826)
-Added parameter to enable/disable verification and timing of kernel in various examples that missed it. -Added parameter to change number of groups to execute in grouped_gemm_examples. Signed-off-by: Michal Kulikowski <Michal.Kulikowski@amd.com>
This commit is contained in:
committed by
GitHub
parent
f3239395dc
commit
ffe9775e70
@@ -236,7 +236,7 @@ void DumpGemmLayerNormPerf(float gemm_reduce_time, float normalize_time, int M,
|
||||
<< " GB/s, " << std::endl;
|
||||
}
|
||||
|
||||
int main()
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// GEMM shape
|
||||
ck::index_t M = 1024;
|
||||
@@ -249,6 +249,25 @@ int main()
|
||||
ck::index_t StrideD1 = 1024;
|
||||
ck::index_t StrideE = 1024;
|
||||
|
||||
bool do_verification = true;
|
||||
bool time_kernel = false;
|
||||
|
||||
if(argc == 1)
|
||||
{
|
||||
// use default
|
||||
}
|
||||
else if(argc == 3)
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
time_kernel = static_cast<bool>(std::stoi(argv[2]));
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("arg1: verification (0=no, 1=yes)\n");
|
||||
printf("arg2: time kernel (0=no, 1=yes)\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
Tensor<ADataType> a_m_k(f_host_tensor_descriptor2d(M, K, StrideA, ALayout{}));
|
||||
Tensor<BDataType> b_k_n(f_host_tensor_descriptor2d(K, N, StrideB, BLayout{}));
|
||||
Tensor<D0DataType> bias_n(f_host_tensor_descriptor1d(N, 1));
|
||||
@@ -357,6 +376,7 @@ int main()
|
||||
normalize_invoker.Run(normalize_argument_ptr.get(), StreamConfig{nullptr, false});
|
||||
|
||||
bool pass = true;
|
||||
if(do_verification)
|
||||
{
|
||||
// verification
|
||||
Tensor<LayerNormOutDataType> host_layerNorm_m_n(
|
||||
@@ -383,27 +403,25 @@ int main()
|
||||
1e-2);
|
||||
}
|
||||
|
||||
if(time_kernel)
|
||||
{
|
||||
// evaluate kernel perf
|
||||
bool time_kernel = true;
|
||||
|
||||
float gemm_reduce_mean_reduce_square_mean_ave_time =
|
||||
gemmReduce_invoker.Run(gemmReduce_argument, StreamConfig{nullptr, time_kernel});
|
||||
gemmReduce_invoker.Run(gemmReduce_argument, StreamConfig{nullptr, true});
|
||||
float normalize_ave_time =
|
||||
normalize_invoker.Run(normalize_argument_ptr.get(), StreamConfig{nullptr, time_kernel});
|
||||
normalize_invoker.Run(normalize_argument_ptr.get(), StreamConfig{nullptr, true});
|
||||
|
||||
if(time_kernel)
|
||||
DumpGemmLayerNormPerf<ADataType,
|
||||
BDataType,
|
||||
EDataType,
|
||||
D0DataType,
|
||||
D1DataType,
|
||||
R0DataType,
|
||||
R1DataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
LayerNormOutDataType>(
|
||||
gemm_reduce_mean_reduce_square_mean_ave_time, normalize_ave_time, M, N, K);
|
||||
DumpGemmLayerNormPerf<ADataType,
|
||||
BDataType,
|
||||
EDataType,
|
||||
D0DataType,
|
||||
D1DataType,
|
||||
R0DataType,
|
||||
R1DataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
LayerNormOutDataType>(
|
||||
gemm_reduce_mean_reduce_square_mean_ave_time, normalize_ave_time, M, N, K);
|
||||
}
|
||||
|
||||
return pass ? 0 : 1;
|
||||
|
||||
@@ -221,7 +221,7 @@ void DumpGemmLayerNormPerf(float gemm_reduce_time, float normalize_time, int M,
|
||||
<< " GB/s, " << std::endl;
|
||||
}
|
||||
|
||||
int main()
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// GEMM shape
|
||||
ck::index_t M = 1024;
|
||||
@@ -232,6 +232,25 @@ int main()
|
||||
ck::index_t StrideB = 1024;
|
||||
ck::index_t StrideE = 1024;
|
||||
|
||||
bool do_verification = true;
|
||||
bool time_kernel = false;
|
||||
|
||||
if(argc == 1)
|
||||
{
|
||||
// use default
|
||||
}
|
||||
else if(argc == 3)
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
time_kernel = static_cast<bool>(std::stoi(argv[2]));
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("arg1: verification (0=no, 1=yes)\n");
|
||||
printf("arg2: time kernel (0=no, 1=yes)\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
Tensor<ADataType> a_m_k(f_host_tensor_descriptor2d(M, K, StrideA, ALayout{}));
|
||||
Tensor<BDataType> b_k_n(f_host_tensor_descriptor2d(K, N, StrideB, BLayout{}));
|
||||
Tensor<EDataType> e_m_n(f_host_tensor_descriptor2d(M, N, StrideE, ELayout{}));
|
||||
@@ -333,6 +352,7 @@ int main()
|
||||
normalize_invoker.Run(normalize_argument_ptr.get(), StreamConfig{nullptr, false});
|
||||
|
||||
bool pass = true;
|
||||
if(do_verification)
|
||||
{
|
||||
// verification
|
||||
Tensor<LayerNormOutDataType> host_layerNorm_m_n(
|
||||
@@ -354,25 +374,23 @@ int main()
|
||||
layerNorm_m_n, host_layerNorm_m_n, "Error: Incorrect results d1", 1e-3, 1e-3);
|
||||
}
|
||||
|
||||
if(time_kernel)
|
||||
{
|
||||
// evaluate kernel perf
|
||||
bool time_kernel = true;
|
||||
|
||||
float gemm_reduce_mean_reduce_square_mean_ave_time =
|
||||
gemmReduce_invoker.Run(gemmReduce_argument, StreamConfig{nullptr, time_kernel});
|
||||
gemmReduce_invoker.Run(gemmReduce_argument, StreamConfig{nullptr, true});
|
||||
float normalize_ave_time =
|
||||
normalize_invoker.Run(normalize_argument_ptr.get(), StreamConfig{nullptr, time_kernel});
|
||||
normalize_invoker.Run(normalize_argument_ptr.get(), StreamConfig{nullptr, true});
|
||||
|
||||
if(time_kernel)
|
||||
DumpGemmLayerNormPerf<ADataType,
|
||||
BDataType,
|
||||
EDataType,
|
||||
R0DataType,
|
||||
R1DataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
LayerNormOutDataType>(
|
||||
gemm_reduce_mean_reduce_square_mean_ave_time, normalize_ave_time, M, N, K);
|
||||
DumpGemmLayerNormPerf<ADataType,
|
||||
BDataType,
|
||||
EDataType,
|
||||
R0DataType,
|
||||
R1DataType,
|
||||
GammaDataType,
|
||||
BetaDataType,
|
||||
LayerNormOutDataType>(
|
||||
gemm_reduce_mean_reduce_square_mean_ave_time, normalize_ave_time, M, N, K);
|
||||
}
|
||||
|
||||
return pass ? 0 : 1;
|
||||
|
||||
Reference in New Issue
Block a user