mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 17:26:00 +00:00
Batched gemm and reduction (#156)
* adding batched_gemm_and_reduction * batched_gemm_reduce works with bactch_count=1 * fix a bug in grid_size; batched_gemm_reduce works for batch_count > 1 * adding profiler for batched_gemm_fp16 * fixed a bug in declaration of d1 and d0; both example and profiler work * clang-format * cleanup * batched_gemm_reduce: add test * minor change * fixed some typo in function names
This commit is contained in:
@@ -17,6 +17,7 @@ int profile_conv_fwd_bias_relu_add(int, char*[]);
|
||||
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
|
||||
int profile_convnd_bwd_data(int, char*[], int);
|
||||
int profile_reduce(int, char*[]);
|
||||
int profile_batched_gemm_reduce(int, char*[]);
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
@@ -44,6 +45,10 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
return profile_batched_gemm(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "batched_gemm_reduce") == 0)
|
||||
{
|
||||
return profile_batched_gemm_reduce(argc, argv);
|
||||
}
|
||||
else if(strcmp(argv[1], "grouped_gemm") == 0)
|
||||
{
|
||||
profile_grouped_gemm(argc, argv);
|
||||
|
||||
Reference in New Issue
Block a user