From e61f956fc2def843366fe6320809eeffd15ba002 Mon Sep 17 00:00:00 2001 From: "assistant-librarian[bot]" Date: Fri, 4 Jul 2025 14:07:12 +0000 Subject: [PATCH] Merge commit '3d70c638d1a217869fe0d90636232d239786b4e5' into develop --- ...uped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp index efb91bd13d..89a304fda4 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp @@ -611,7 +611,19 @@ struct DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1 // If stride is larger than windows size then we will have some empty places image_covered_strides &= conv_filter_strides[d] <= b_g_k_c_xs_lengths[d + I3]; } + bool if_d_is_output_mem = false; + const void* out_mem_void = static_cast(p_e); + static_for<0, NumDTensor, 1>{}([&](auto i) { + if(p_ds[i] == out_mem_void) + { + if_d_is_output_mem = true; + } + }); + bwd_needs_zero_out = k_batch_ > 1 || !image_covered_dilation || !image_covered_strides; + + // Temporary workaround untill prove/fix above conditions. + bwd_needs_zero_out = !if_d_is_output_mem; e_space_size_bytes = ck::accumulate_n( e_g_n_c_wis_lengths_.begin(), NDimSpatial + I3, 1, std::multiplies<>()) *