Batched gemm and reduction (#156)

* adding batched_gemm_and_reduction

* batched_gemm_reduce works with bactch_count=1

* fix a bug in grid_size; batched_gemm_reduce works for batch_count > 1

* adding profiler for batched_gemm_fp16

* fixed a bug in declaration of d1 and d0; both example and profiler work

* clang-format

* cleanup

* batched_gemm_reduce: add test

* minor change

* fixed some typo in function names
This commit is contained in:
Jianfeng Yan
2022-03-30 11:21:18 -05:00
committed by GitHub
parent 98e1e2d0e9
commit 34c661e71c
27 changed files with 2145 additions and 62 deletions

View File

@@ -17,6 +17,7 @@ int profile_conv_fwd_bias_relu_add(int, char*[]);
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
int profile_convnd_bwd_data(int, char*[], int);
int profile_reduce(int, char*[]);
int profile_batched_gemm_reduce(int, char*[]);
int main(int argc, char* argv[])
{
@@ -44,6 +45,10 @@ int main(int argc, char* argv[])
{
return profile_batched_gemm(argc, argv);
}
else if(strcmp(argv[1], "batched_gemm_reduce") == 0)
{
return profile_batched_gemm_reduce(argc, argv);
}
else if(strcmp(argv[1], "grouped_gemm") == 0)
{
profile_grouped_gemm(argc, argv);