mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 08:50:17 +00:00
Fix the slow cpu reference batched gemm kernels. (#388)
* fix the performance of the batched gemm verification * fix tabs
This commit is contained in:
@@ -83,8 +83,8 @@ struct ReferenceBatchedGemm : public device::BaseOperator
|
||||
make_ParallelTensorFunctor(f_gmk_gkn_gmn,
|
||||
arg.c_g_m_n_.mDesc.GetLengths()[0],
|
||||
arg.c_g_m_n_.mDesc.GetLengths()[1],
|
||||
arg.c_g_m_n_.mDesc.GetLengths()[2])();
|
||||
|
||||
arg.c_g_m_n_.mDesc.GetLengths()[2])(
|
||||
std::thread::hardware_concurrency());
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user