diff --git a/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp b/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp index bf7cdc8c5a..bab0b5d7fe 100644 --- a/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp +++ b/driver/device_implicit_gemm_convolution_2_chwn_cyxk_khwn.hpp @@ -190,7 +190,7 @@ void device_implicit_gemm_convolution_2_chwn_cyxk_khwn(InDesc, constexpr index_t WeiBlockCopyDataPerRead = 4; constexpr index_t BlockSize = 256; -#elif 1 +#elif 0 // 1x1, 14x14, Vega 20, disable lds_double_buffer, enable register double buffer constexpr index_t BPerBlock = 64; constexpr index_t KPerBlock = 128; diff --git a/src/include/gridwise_implicit_gemm_convolution_2_chwn_cyxk_khwn.hip.hpp b/src/include/gridwise_implicit_gemm_convolution_2_chwn_cyxk_khwn.hip.hpp index fd223dfad4..657c233e5e 100644 --- a/src/include/gridwise_implicit_gemm_convolution_2_chwn_cyxk_khwn.hip.hpp +++ b/src/include/gridwise_implicit_gemm_convolution_2_chwn_cyxk_khwn.hip.hpp @@ -315,7 +315,7 @@ class gridwise_implicit_gemm_convolution_2_chwn_cyxk_khwn auto f_accum = [](auto& acc, const auto&& v) { acc += v; }; #if 0 blockwise_gemm.Run -#elif 1 +#elif 0 blockwise_gemm.Run_RegisterDoubleBuffer #elif 1 blockwise_gemm.Run_asm