From 4c85e10281cbd7d3a7138fdb91123e2fa032bbc6 Mon Sep 17 00:00:00 2001 From: joye Date: Wed, 11 Jun 2025 12:38:22 +0800 Subject: [PATCH] update kernel --- ...evice_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp index aeffadddd0..db4852c9e0 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp @@ -586,8 +586,8 @@ __global__ void kernel_grouped_conv_bwd_data_optimized_v2(const ABDataType* __re (rel_in_y + y) * TileInW * GroupPerBlockInFP4 + (rel_in_x + x) * GroupPerBlockInFP4 + group_out_id]; ABDTypeVec_t shmem_k_vec = reinterpret_cast( - shmem_k)[(kernel_y + y * up_h) * kernelW * GroupPerBlock + - (kernel_x + x * up_w) * GroupPerBlock + group_out_id]; + shmem_k)[(kernel_y + y * up_h) * kernelW * GroupPerBlockInFP4 + + (kernel_x + x * up_w) * GroupPerBlockInFP4 + group_out_id]; static_for<0, ElementPerInFP4, 1>{}([&](auto idx) { auto x_val = shmem_x_vec.template AsType()[idx];