Fix the slow cpu reference batched gemm kernels. (#388)

* fix the performance of the batched gemm verification

* fix tabs
This commit is contained in:
Illia Silin
2022-08-29 06:39:21 -07:00
committed by GitHub
parent 1e5b59df22
commit 9061d39bd6

View File

@@ -83,8 +83,8 @@ struct ReferenceBatchedGemm : public device::BaseOperator
make_ParallelTensorFunctor(f_gmk_gkn_gmn,
arg.c_g_m_n_.mDesc.GetLengths()[0],
arg.c_g_m_n_.mDesc.GetLengths()[1],
arg.c_g_m_n_.mDesc.GetLengths()[2])();
arg.c_g_m_n_.mDesc.GetLengths()[2])(
std::thread::hardware_concurrency());
return 0;
}