From f8bc223a581b92c1db755b660ece207e4e0a31e6 Mon Sep 17 00:00:00 2001 From: joye Date: Tue, 3 Jun 2025 09:27:57 +0800 Subject: [PATCH] update kernel to pass --- ...evice_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp index 56bdda0090..b1aa3803a9 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp @@ -180,8 +180,8 @@ __global__ void #if CK_USE_LAUNCH_BOUNDS __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU) #endif - kernel_grouped_conv_bwd_data_optimized(const ABDataType* __restrict__ p_weight, - const ABDataType* __restrict__ p_gradOut, + kernel_grouped_conv_bwd_data_optimized(const ABDataType* __restrict__ p_gradOut, + const ABDataType* __restrict__ p_weight, EDataType* __restrict__ p_gradIn) { int grp_idx = 64 * blockIdx.x;