Always force output clearing for grouped conv bwd data (#2446)

* Always force output clearing

* dont run set zero for residual

---------

Co-authored-by: Bartlomiej Kocot <barkocot@amd.com>
This commit is contained in:
Adam Osewski
2025-07-04 15:49:52 +02:00
committed by GitHub
parent 394e5be10d
commit 3d70c638d1

View File

@@ -611,7 +611,19 @@ struct DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
// If stride is larger than windows size then we will have some empty places
image_covered_strides &= conv_filter_strides[d] <= b_g_k_c_xs_lengths[d + I3];
}
bool if_d_is_output_mem = false;
const void* out_mem_void = static_cast<const void*>(p_e);
static_for<0, NumDTensor, 1>{}([&](auto i) {
if(p_ds[i] == out_mem_void)
{
if_d_is_output_mem = true;
}
});
bwd_needs_zero_out = k_batch_ > 1 || !image_covered_dilation || !image_covered_strides;
// Temporary workaround untill prove/fix above conditions.
bwd_needs_zero_out = !if_d_is_output_mem;
e_space_size_bytes =
ck::accumulate_n<long_index_t>(
e_g_n_c_wis_lengths_.begin(), NDimSpatial + I3, 1, std::multiplies<>()) *