From 9061d39bd6f44efc6b110466e5859b2d41a4640e Mon Sep 17 00:00:00 2001 From: Illia Silin <98187287+illsilin@users.noreply.github.com> Date: Mon, 29 Aug 2022 06:39:21 -0700 Subject: [PATCH] Fix the slow cpu reference batched gemm kernels. (#388) * fix the performance of the batched gemm verification * fix tabs --- .../reference_tensor_operation/cpu/reference_batched_gemm.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp index 269126432b..46a1fa559a 100644 --- a/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp +++ b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp @@ -83,8 +83,8 @@ struct ReferenceBatchedGemm : public device::BaseOperator make_ParallelTensorFunctor(f_gmk_gkn_gmn, arg.c_g_m_n_.mDesc.GetLengths()[0], arg.c_g_m_n_.mDesc.GetLengths()[1], - arg.c_g_m_n_.mDesc.GetLengths()[2])(); - + arg.c_g_m_n_.mDesc.GetLengths()[2])( + std::thread::hardware_concurrency()); return 0; }