mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 12:41:26 +00:00
Always force output clearing for grouped conv bwd data (#2446)
* Always force output clearing * dont run set zero for residual --------- Co-authored-by: Bartlomiej Kocot <barkocot@amd.com>
This commit is contained in:
@@ -611,7 +611,19 @@ struct DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
|
||||
// If stride is larger than windows size then we will have some empty places
|
||||
image_covered_strides &= conv_filter_strides[d] <= b_g_k_c_xs_lengths[d + I3];
|
||||
}
|
||||
bool if_d_is_output_mem = false;
|
||||
const void* out_mem_void = static_cast<const void*>(p_e);
|
||||
static_for<0, NumDTensor, 1>{}([&](auto i) {
|
||||
if(p_ds[i] == out_mem_void)
|
||||
{
|
||||
if_d_is_output_mem = true;
|
||||
}
|
||||
});
|
||||
|
||||
bwd_needs_zero_out = k_batch_ > 1 || !image_covered_dilation || !image_covered_strides;
|
||||
|
||||
// Temporary workaround untill prove/fix above conditions.
|
||||
bwd_needs_zero_out = !if_d_is_output_mem;
|
||||
e_space_size_bytes =
|
||||
ck::accumulate_n<long_index_t>(
|
||||
e_g_n_c_wis_lengths_.begin(), NDimSpatial + I3, 1, std::multiplies<>()) *
|
||||
|
||||
Reference in New Issue
Block a user