mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 12:59:49 +00:00
Replace grouped conv bwd wei wmmaV3 bilin/scale bf16f32bf16 support with bf16bf16bf16 (#3470)
* Replace grouped convolution bwd weight wmma v3 bilinear and scale bf16f32bf16 support with bf16bf16bf16 support. Update tests.
* Tentative fix for bwd weight bilinear bf16bf16bf16, seems like the bilinear elementwise overload for this case (bf16, f32 accu, bf16) was wrong.
[ROCm/composable_kernel commit: 88ae445580]
This commit is contained in:
committed by
GitHub
parent
13134864cc
commit
ac28f1b016
@@ -746,7 +746,6 @@ struct DeviceGroupedConvBwdWeightMultipleD_Wmma_CShuffleV3
|
||||
arg.a_grid_desc_kbatch_k0_m_k1_.GetLength(I2);
|
||||
|
||||
AccDataType* p_e_grid = type_convert<AccDataType*>(arg.p_workspace_);
|
||||
;
|
||||
|
||||
// Convolution kernel dispatch
|
||||
typename GridwiseGemm::Argument gemm_arg{
|
||||
|
||||
@@ -348,9 +348,7 @@ struct Bilinear
|
||||
__host__ __device__ constexpr void
|
||||
operator()<bhalf_t, float, bhalf_t>(bhalf_t& y, const float& x0, const bhalf_t& x1) const
|
||||
{
|
||||
const float x1_tmp = ck::type_convert<float>(x1);
|
||||
const float y_tmp = alpha_ * x0 + beta_ * x1_tmp;
|
||||
y = y_tmp;
|
||||
y = type_convert<bhalf_t>(alpha_ * x0 + beta_ * ck::type_convert<float>(x1));
|
||||
};
|
||||
|
||||
template <>
|
||||
|
||||
Reference in New Issue
Block a user