diff --git a/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn.hpp b/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn.hpp index cca1fa29c9..7a107ef0e1 100644 --- a/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn.hpp +++ b/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn.hpp @@ -423,7 +423,7 @@ void device_implicit_gemm_convolution_1_chwn_cyxk_khwn(InDesc, printf("Elapsed time : %f ms, %f TFlop/s\n", time, (float)calculate_convolution_flops(InDesc{}, WeiDesc{}, OutDesc{}) / - (std::size_t(1024) * 1024 * 1024 * 1024) / (time / 1000)); + (std::size_t(1000) * 1000 * 1000) / time); usleep(std::min(time * 1000, float(10000))); } diff --git a/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp b/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp index fb673bd6a6..cb72639691 100644 --- a/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp +++ b/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp @@ -314,7 +314,7 @@ void device_implicit_gemm_convolution_2_chwn_cyxk_khwn(InDesc, printf("Elapsed time : %f ms, %f TFlop/s\n", time, (float)calculate_convolution_flops(InDesc{}, WeiDesc{}, OutDesc{}) / - (std::size_t(1024) * 1024 * 1024 * 1024) / (time / 1000)); + (std::size_t(1000) * 1000 * 1000) / time); usleep(std::min(time * 1000, float(10000))); } diff --git a/src/include/blockwise_batched_gemm.hip.hpp b/src/include/blockwise_batched_gemm.hip.hpp index 8b8e4d9566..87f17532d4 100644 --- a/src/include/blockwise_batched_gemm.hip.hpp +++ b/src/include/blockwise_batched_gemm.hip.hpp @@ -259,7 +259,6 @@ struct BlockwiseBatchGemmBlockABlockBThreadCTransANormalBNormalC_V2 c_thread_mtx, False, p_c_thread + ib * ThreadMatrixStrideC); - } } } diff --git a/src/include/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hip.hpp b/src/include/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hip.hpp index 023cfc68e5..90370b7b4f 100644 --- a/src/include/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hip.hpp +++ b/src/include/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hip.hpp @@ -269,9 +269,9 @@ struct GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer #elif 0 blockwise_gemm.Run_asm #endif - (p_wei_block_now + wei_cyxk_block_desc.Get1dIndex(0, y, x, 0), - p_in_block_now + y * Wi + x, - p_out_thread); + (p_wei_block_now + wei_cyxk_block_desc.Get1dIndex(0, y, x, 0), + p_in_block_now + y * Wi + x, + p_out_thread); } } @@ -310,9 +310,9 @@ struct GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer #elif 0 blockwise_gemm.Run_asm #endif - (p_wei_block_double + wei_cyxk_block_desc.Get1dIndex(0, y, x, 0), - p_in_block_double + y * Wi + x, - p_out_thread); + (p_wei_block_double + wei_cyxk_block_desc.Get1dIndex(0, y, x, 0), + p_in_block_double + y * Wi + x, + p_out_thread); } } @@ -335,10 +335,10 @@ struct GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer #elif 0 blockwise_gemm.Run_asm #endif - (p_wei_block_double + wei_block_space + - wei_cyxk_block_desc.Get1dIndex(0, y, x, 0), - p_in_block_double + in_block_space + y * Wi + x, - p_out_thread); + (p_wei_block_double + wei_block_space + + wei_cyxk_block_desc.Get1dIndex(0, y, x, 0), + p_in_block_double + in_block_space + y * Wi + x, + p_out_thread); } } }