[CK TILE] Clear output buffers for grouped conv bwd (#3127)

This commit is contained in:
Bartłomiej Kocot
2025-10-31 14:11:54 +01:00
committed by GitHub
parent e135dd518d
commit c2d7931446
2 changed files with 24 additions and 4 deletions

View File

@@ -170,8 +170,15 @@ struct GroupedConvolutionBackwardDataInvoker
<< ", Vector size C: " << ConvEpilogue::GetVectorSizeC() << std::endl;
}
ave_time = ck_tile::launch_kernel(
s, ck_tile::make_kernel<kBlockPerCu>(Kernel{}, grids, blocks, 0, kargs));
auto preprocess = [&]() {
ck_tile::hip_check_error(hipMemsetAsync(
kargs.in_ptr, 0, args.template GetInputByte<InDataType>(), s.stream_id_));
};
ave_time = ck_tile::launch_kernel_time_mask(
s,
preprocess,
ck_tile::make_kernel<kBlockPerCu>(Kernel{}, grids, blocks, 0, kargs));
return ave_time;
};

View File

@@ -171,8 +171,21 @@ struct GroupedConvolutionBackwardWeightInvoker
<< ", Vector size C: " << ConvEpilogue::GetVectorSizeC() << std::endl;
}
ave_time = ck_tile::launch_kernel(
s, ck_tile::make_kernel<kBlockPerCu>(Kernel{}, grids, blocks, 0, kargs));
auto preprocess = [&]() {
if(args.k_batch > 1)
{
ck_tile::hip_check_error(
hipMemsetAsync(kargs.wei_ptr,
0,
args.template GetWeightByte<WeiDataType>(),
s.stream_id_));
}
};
ave_time = ck_tile::launch_kernel_time_mask(
s,
preprocess,
ck_tile::make_kernel<kBlockPerCu>(Kernel{}, grids, blocks, 0, kargs));
return ave_time;
};