mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 17:26:00 +00:00
batched_gemm + multiple_d + gemm + multiple_d (#394)
* refactor * start * add device gemm file * add BatchStrideD0 * add stridd0 * add gridwise file * add d0 parameters to gridwise gemm * add c layout transformer * add d0 threadwise copy * init kernel * init kernel * regular code * nm desc put to out * kernel parameter can not use reference * host add bias+gelu * run right for bias+gelu * change AddFastGelu into another file * interface add d1 bias parameters * add d1 parameter to argument * add d1 parameter to gridwise * first all code,not verify * gelu change to relu and GetElementSpaceSize bug * add instance * start add to ckprofiler * ckprofiler finish code * change input parameter for ckProfiler * fix host bias+gelu bug * show help for ckProfiler * fix bug for lunch kernel ignore parametes * add pad and fix about bug * mutiple d0 * add dynamic d0_element_op * change profiler and instance to mutiple d0 * example have 2 d0 * remove some comments not using * change 2 d0 have self parameters * change d element_op name * change class name(multiple_d) * fix bug * fix bug that don't find file * update profiler * refactor * update profiler * clean * revert example change * add gon layout * optimize parameter for gno * add gon to gemm+gemm * change helping input parameters * change to GemmPadder_v2 * using ForEach * fix gb_per_sec Co-authored-by: Chao Liu <lc.roy86@gmail.com> Co-authored-by: ltqin <letaoqin@amd.com>
This commit is contained in:
@@ -10,6 +10,8 @@ int profile_gemm_add_add_fastgelu(int, char*[]);
|
||||
int profile_gemm_reduce(int, char*[]);
|
||||
int profile_gemm_bias_add_reduce(int, char*[]);
|
||||
int profile_batched_gemm(int, char*[]);
|
||||
int profile_batched_gemm_gemm(int, char*[]);
|
||||
int profile_batched_gemm_add_relu_gemm_add(int, char*[]);
|
||||
int profile_batched_gemm_reduce(int, char*[]);
|
||||
int profile_grouped_gemm(int, char*[]);
|
||||
int profile_conv_fwd(int, char*[]);
|
||||
@@ -32,6 +34,8 @@ static void print_helper_message()
|
||||
" gemm_reduce: GEMM+Reduce\n"
|
||||
" gemm_bias_add_reduce: GEMM+Bias+Add+Reduce\n"
|
||||
" batched_gemm: Batched GEMM\n"
|
||||
" batched_gemm_gemm: Batched+GEMM+GEMM\n"
|
||||
" batched_gemm_add_relu_gemm_add: Batched+GEMM+bias+gelu+GEMM+bias\n"
|
||||
" batched_gemm_reduce: Batched GEMM+Reduce\n"
|
||||
" grouped_gemm: Grouped GEMM\n"
|
||||
" conv_fwd: Convolution Forward\n"
|
||||
@@ -80,6 +84,14 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
return profile_batched_gemm(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "batched_gemm_gemm") == 0)
|
||||
{
|
||||
return profile_batched_gemm_gemm(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "batched_gemm_add_relu_gemm_add") == 0)
|
||||
{
|
||||
return profile_batched_gemm_add_relu_gemm_add(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "batched_gemm_reduce") == 0)
|
||||
{
|
||||
return profile_batched_gemm_reduce(argc, argv);
|
||||
|
||||
Reference in New Issue
Block a user