From 850b9adbf977ba795e239e10d9aa04783e2e49aa Mon Sep 17 00:00:00 2001 From: joye Date: Tue, 3 Jun 2025 12:56:38 +0800 Subject: [PATCH] current perf best kernel --- ...evice_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp index 024ea6645b..27e5c4af3f 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp @@ -1058,7 +1058,7 @@ struct DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1 auto launch_kernel = [&]() { // constexpr bool has_main_loop = has_main_k_block_loop.value; constexpr index_t GroupPerBlock = 64; - constexpr index_t BatchPerBlock = 64; + constexpr index_t BatchPerBlock = 8; const auto kernel = kernel_grouped_conv_bwd_data_optimized