Merge commit '3d70c638d1a217869fe0d90636232d239786b4e5' into develop

This commit is contained in:
assistant-librarian[bot]
2025-07-04 14:07:12 +00:00
parent ec10093322
commit e61f956fc2

View File

@@ -611,7 +611,19 @@ struct DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
// If stride is larger than windows size then we will have some empty places
image_covered_strides &= conv_filter_strides[d] <= b_g_k_c_xs_lengths[d + I3];
}
bool if_d_is_output_mem = false;
const void* out_mem_void = static_cast<const void*>(p_e);
static_for<0, NumDTensor, 1>{}([&](auto i) {
if(p_ds[i] == out_mem_void)
{
if_d_is_output_mem = true;
}
});
bwd_needs_zero_out = k_batch_ > 1 || !image_covered_dilation || !image_covered_strides;
// Temporary workaround untill prove/fix above conditions.
bwd_needs_zero_out = !if_d_is_output_mem;
e_space_size_bytes =
ck::accumulate_n<long_index_t>(
e_g_n_c_wis_lengths_.begin(), NDimSpatial + I3, 1, std::multiplies<>()) *