diff --git a/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp b/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp index a773a078e4..497aa3e9c1 100644 --- a/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp +++ b/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp @@ -190,8 +190,8 @@ void device_implicit_gemm_convolution_2_chwn_cyxk_khwn(InDesc, constexpr index_t WeiBlockCopyDataPerRead = 4; constexpr index_t BlockSize = 256; -#elif 0 - // 1x1, 14x14, Vega 20 +#elif 1 + // 1x1, 14x14, Vega 20, disable lds_double_buffer, enable register double buffer constexpr index_t BPerBlock = 64; constexpr index_t KPerBlock = 128; constexpr index_t CPerBlock = 8; @@ -220,7 +220,7 @@ void device_implicit_gemm_convolution_2_chwn_cyxk_khwn(InDesc, constexpr index_t WeiBlockCopyDataPerRead = 4; constexpr index_t BlockSize = 128; -#elif 1 +#elif 0 // 1x1, 14x14, Vega 20, hack CPerBlock = 1 constexpr index_t BPerBlock = 64; constexpr index_t KPerBlock = 128;