From f8bc223a581b92c1db755b660ece207e4e0a31e6 Mon Sep 17 00:00:00 2001
From: joye <joye@amd.com>
Date: Tue, 3 Jun 2025 09:27:57 +0800
Subject: [PATCH] update kernel to pass

---
 ...evice_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp
index 56bdda0090..b1aa3803a9 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp
@@ -180,8 +180,8 @@ __global__ void
 #if CK_USE_LAUNCH_BOUNDS
     __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU)
 #endif
-        kernel_grouped_conv_bwd_data_optimized(const ABDataType* __restrict__ p_weight,
-                                               const ABDataType* __restrict__ p_gradOut,
+        kernel_grouped_conv_bwd_data_optimized(const ABDataType* __restrict__ p_gradOut,
+                                               const ABDataType* __restrict__ p_weight,
                                                EDataType* __restrict__ p_gradIn)
 {
     int grp_idx                  = 64 * blockIdx.x;