mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-27 08:25:46 +00:00
fixed multiple definition issue of bfp16/fp32 conversion function when building ckProfiler (#51)
* fixed bfloat16 issues
* refactor type_convert
Co-authored-by: Chao Liu <chao.liu2@amd.com>
[ROCm/composable_kernel commit: 0a66c54e95]
This commit is contained in:
@@ -82,8 +82,8 @@ void host_convolution_forward(const Tensor<TIn>& in,
|
||||
{
|
||||
if constexpr(is_same<TIn, ushort>::value)
|
||||
{
|
||||
v += ck::bf16_to_f32(in(n, c, hi, wi)) *
|
||||
ck::bf16_to_f32(wei(k, c, y, x));
|
||||
v += ck::type_convert<float>(in(n, c, hi, wi)) *
|
||||
ck::type_convert<float>(wei(k, c, y, x));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -97,7 +97,7 @@ void host_convolution_forward(const Tensor<TIn>& in,
|
||||
|
||||
if constexpr(is_same<TOut, ushort>::value)
|
||||
{
|
||||
out(n, k, ho, wo) = f32_to_bf16(v);
|
||||
out(n, k, ho, wo) = type_convert<ushort>(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -120,8 +120,8 @@ void host_convolution_forward(const Tensor<TIn>& in,
|
||||
{
|
||||
if constexpr(is_same<TIn, ushort>::value)
|
||||
{
|
||||
v += ck::bf16_to_f32(in(n, hi, wi, c)) *
|
||||
ck::bf16_to_f32(wei(k, y, x, c));
|
||||
v += ck::type_convert<float>(in(n, hi, wi, c)) *
|
||||
ck::type_convert<float>(wei(k, y, x, c));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -134,7 +134,7 @@ void host_convolution_forward(const Tensor<TIn>& in,
|
||||
}
|
||||
if constexpr(is_same<TOut, ushort>::value)
|
||||
{
|
||||
out(n, ho, wo, k) = f32_to_bf16(v);
|
||||
out(n, ho, wo, k) = ck::type_convert<ushort>(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user