diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp index 269126432b..46a1fa559a 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp @@ -83,8 +83,8 @@ struct ReferenceBatchedGemm : public device::BaseOperator make_ParallelTensorFunctor(f_gmk_gkn_gmn, arg.c_g_m_n_.mDesc.GetLengths()[0], arg.c_g_m_n_.mDesc.GetLengths()[1], - arg.c_g_m_n_.mDesc.GetLengths()[2])(); - + arg.c_g_m_n_.mDesc.GetLengths()[2])( + std::thread::hardware_concurrency()); return 0; }