mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 09:16:52 +00:00
updated bfloat16_to_float
This commit is contained in:
@@ -5,7 +5,6 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_fp16.h"
|
||||
#endif
|
||||
#include "bfloat16_dev.hpp"
|
||||
|
||||
// "Constant" address space for kernel parameter
|
||||
#define CONSTANT __attribute__((address_space(4)))
|
||||
|
||||
@@ -927,6 +927,58 @@ using int8x16_t = typename vector_type<int8_t, 16>::type;
|
||||
using int8x32_t = typename vector_type<int8_t, 32>::type;
|
||||
using int8x64_t = typename vector_type<int8_t, 64>::type;
|
||||
|
||||
__host__ __device__ float bf16_to_f32(ushort src_val)
|
||||
{
|
||||
union
|
||||
{
|
||||
uint32_t int32;
|
||||
float fp32;
|
||||
} u = {uint32_t(src_val) << 16};
|
||||
return u.fp32;
|
||||
}
|
||||
|
||||
__host__ __device__ ushort f32_to_bf16(float src_val)
|
||||
{
|
||||
union
|
||||
{
|
||||
float fp32;
|
||||
uint32_t int32;
|
||||
} u = {src_val};
|
||||
if(~u.int32 & 0x7f800000)
|
||||
{
|
||||
// When the exponent bits are not all 1s, then the value is zero, normal,
|
||||
// or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
|
||||
// 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
|
||||
// This causes the bfloat16's mantissa to be incremented by 1 if the 16
|
||||
// least significant bits of the float mantissa are greater than 0x8000,
|
||||
// or if they are equal to 0x8000 and the least significant bit of the
|
||||
// bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
|
||||
// the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
|
||||
// has the value 0x7f, then incrementing it causes it to become 0x00 and
|
||||
// the exponent is incremented by one, which is the next higher FP value
|
||||
// to the unrounded bfloat16 value. When the bfloat16 value is subnormal
|
||||
// with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
|
||||
// to a normal value with an exponent of 0x01 and a mantissa of 0x00.
|
||||
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
|
||||
// incrementing it causes it to become an exponent of 0xFF and a mantissa
|
||||
// of 0x00, which is Inf, the next higher value to the unrounded value.
|
||||
u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even
|
||||
}
|
||||
else if(u.int32 & 0xffff)
|
||||
{
|
||||
// When all of the exponent bits are 1, the value is Inf or NaN.
|
||||
// Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
|
||||
// mantissa bit. Quiet NaN is indicated by the most significant mantissa
|
||||
// bit being 1. Signaling NaN is indicated by the most significant
|
||||
// mantissa bit being 0 but some other bit(s) being 1. If any of the
|
||||
// lower 16 bits of the mantissa are 1, we set the least significant bit
|
||||
// of the bfloat16 mantissa, in order to preserve signaling NaN in case
|
||||
// the bloat16's mantissa bits are all 0.
|
||||
u.int32 |= 0x10000; // Preserve signaling NaN
|
||||
}
|
||||
return uint16_t(u.int32 >> 16);
|
||||
}
|
||||
|
||||
// data type conversion
|
||||
template <typename T>
|
||||
struct type_convert
|
||||
@@ -942,14 +994,14 @@ template <>
|
||||
template <>
|
||||
__device__ float type_convert<float>::operator()<ushort>(ushort x) const
|
||||
{
|
||||
return bfloat16_to_float(x);
|
||||
return bf16_to_f32(x);
|
||||
}
|
||||
|
||||
template <>
|
||||
template <>
|
||||
__device__ ushort type_convert<ushort>::operator()<float>(float x) const
|
||||
{
|
||||
return float_to_bfloat16(x);
|
||||
return f32_to_bf16(x);
|
||||
}
|
||||
|
||||
// TODO: deprecate this
|
||||
|
||||
@@ -28,6 +28,12 @@ __device__ void inner_product<float, float, float>(const float& a, const float&
|
||||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ void inner_product<ushort, ushort, float>(const ushort& a, const ushort& b, float& c)
|
||||
{
|
||||
c += bf16_to_f32(a) * bf16_to_f32(b);
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ void
|
||||
inner_product<float2_t, float2_t, float>(const float2_t& a, const float2_t& b, float& c)
|
||||
|
||||
125
external/rocm/include/bfloat16_dev.hpp
vendored
125
external/rocm/include/bfloat16_dev.hpp
vendored
@@ -1,125 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef BFLOAT16_DEVICE_HPP
|
||||
#define BFLOAT16_DEVICE_HPP
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#define EXECUTION_SPECIFIER __device__ __host__
|
||||
#else
|
||||
#define EXECUTION_SPECIFIER
|
||||
#endif // MIOPEN_BACKEND_HIP
|
||||
|
||||
typedef union
|
||||
{
|
||||
uint u32;
|
||||
ushort2 ushortx2;
|
||||
|
||||
// Composable kernels are written in HIP language. The language doesnt support
|
||||
// ushort2.hi or ushort2.low.
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
ushort ushortvec[2];
|
||||
#endif // MIOPEN_BACKEND_HIP
|
||||
float f32;
|
||||
} cvt_bf16_fp32_t;
|
||||
|
||||
EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
|
||||
{
|
||||
cvt_bf16_fp32_t target_val;
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
target_val.ushortx2 = make_ushort2(0, src_val);
|
||||
#else
|
||||
target_val.ushortx2 = (ushort2)(0, src_val);
|
||||
#endif
|
||||
|
||||
return target_val.f32;
|
||||
}
|
||||
|
||||
EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
|
||||
{
|
||||
cvt_bf16_fp32_t target_val;
|
||||
target_val.f32 = src_val;
|
||||
// BF16 round and NaN preservation code matches
|
||||
// https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/library/include/rocblas_bfloat16.h
|
||||
if((~target_val.u32 & 0x7f800000) == 0) // Inf or NaN
|
||||
{
|
||||
// When all of the exponent bits are 1, the value is Inf or NaN.
|
||||
// Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
|
||||
// mantissa bit. Quiet NaN is indicated by the most significant mantissa
|
||||
// bit being 1. Signaling NaN is indicated by the most significant
|
||||
// mantissa bit being 0 but some other bit(s) being 1. If any of the
|
||||
// lower 16 bits of the mantissa are 1, we set the least significant bit
|
||||
// of the bfloat16 mantissa, in order to preserve signaling NaN in case
|
||||
// the bloat16's mantissa bits are all 0.
|
||||
if((target_val.u32 & 0xffff) != 0)
|
||||
{
|
||||
target_val.u32 |= 0x10000; // Preserve signaling NaN
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef MIOPEN_USE_RNE_BFLOAT16
|
||||
// When the exponent bits are not all 1s, then the value is zero, normal,
|
||||
// or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
|
||||
// 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
|
||||
// This causes the bfloat16's mantissa to be incremented by 1 if the 16
|
||||
// least significant bits of the float mantissa are greater than 0x8000,
|
||||
// or if they are equal to 0x8000 and the least significant bit of the
|
||||
// bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
|
||||
// the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
|
||||
// has the value 0x7f, then incrementing it causes it to become 0x00 and
|
||||
// the exponent is incremented by one, which is the next higher FP value
|
||||
// to the unrounded bfloat16 value. When the bfloat16 value is subnormal
|
||||
// with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
|
||||
// to a normal value with an exponent of 0x01 and a mantissa of 0x00.
|
||||
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
|
||||
// incrementing it causes it to become an exponent of 0xFF and a mantissa
|
||||
// of 0x00, which is Inf, the next higher value to the unrounded value.
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
|
||||
#else
|
||||
target_val.u32 +=
|
||||
(0x7fff + (target_val.ushortx2.hi & 1)); // Round to nearest, round to even
|
||||
#endif // MIOPEN_BACKEND_HIP
|
||||
#endif // MIOPEN_USE_RNE_BFLOAT16
|
||||
}
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
return target_val.ushortvec[1];
|
||||
#else
|
||||
return target_val.ushortx2.hi;
|
||||
#endif // MIOPEN_BACKEND_HIP
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // BFLOAT16_DEVICE_HPP
|
||||
@@ -82,8 +82,8 @@ void host_convolution_forward(const Tensor<TIn>& in,
|
||||
{
|
||||
if constexpr(is_same<TIn, ushort>::value)
|
||||
{
|
||||
v += bfloat16_to_float(in(n, c, hi, wi)) *
|
||||
bfloat16_to_float(wei(k, c, y, x));
|
||||
v += ck::bf16_to_f32(in(n, c, hi, wi)) *
|
||||
ck::bf16_to_f32(wei(k, c, y, x));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -97,7 +97,7 @@ void host_convolution_forward(const Tensor<TIn>& in,
|
||||
|
||||
if constexpr(is_same<TOut, ushort>::value)
|
||||
{
|
||||
out(n, k, ho, wo) = float_to_bfloat16(v);
|
||||
out(n, k, ho, wo) = f32_to_bf16(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -120,8 +120,8 @@ void host_convolution_forward(const Tensor<TIn>& in,
|
||||
{
|
||||
if constexpr(is_same<TIn, ushort>::value)
|
||||
{
|
||||
v += bfloat16_to_float(in(n, hi, wi, c)) *
|
||||
bfloat16_to_float(wei(k, y, x, c));
|
||||
v += ck::bf16_to_f32(in(n, hi, wi, c)) *
|
||||
ck::bf16_to_f32(wei(k, y, x, c));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -134,7 +134,7 @@ void host_convolution_forward(const Tensor<TIn>& in,
|
||||
}
|
||||
if constexpr(is_same<TOut, ushort>::value)
|
||||
{
|
||||
out(n, ho, wo, k) = float_to_bfloat16(v);
|
||||
out(n, ho, wo, k) = f32_to_bf16(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -16,10 +16,10 @@ void host_gemm<ushort, ushort, ushort>(const Tensor<ushort>& a,
|
||||
|
||||
for(int k = 0; k < K; ++k)
|
||||
{
|
||||
v += bfloat16_to_float(a(m, k)) * bfloat16_to_float(b(k, n));
|
||||
v += ck::bf16_to_f32(a(m, k)) * ck::bf16_to_f32(b(k, n));
|
||||
}
|
||||
|
||||
c(m, n) = float_to_bfloat16(v);
|
||||
c(m, n) = ck::f32_to_bf16(v);
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(f_mk_kn_mn, c.mDesc.GetLengths()[0], c.mDesc.GetLengths()[1])(
|
||||
@@ -34,10 +34,10 @@ void host_gemm<ushort, ushort, ushort>(const Tensor<ushort>& a,
|
||||
|
||||
for(int k = 0; k < K; ++k)
|
||||
{
|
||||
v += bfloat16_to_float(a(m, k)) * bfloat16_to_float(b(n, k));
|
||||
v += ck::bf16_to_f32(a(m, k)) * ck::bf16_to_f32(b(n, k));
|
||||
}
|
||||
|
||||
c(m, n) = float_to_bfloat16(v);
|
||||
c(m, n) = ck::f32_to_bf16(v);
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(f_mk_nk_mn, c.mDesc.GetLengths()[0], c.mDesc.GetLengths()[1])(
|
||||
@@ -52,10 +52,10 @@ void host_gemm<ushort, ushort, ushort>(const Tensor<ushort>& a,
|
||||
|
||||
for(int k = 0; k < K; ++k)
|
||||
{
|
||||
v += bfloat16_to_float(a(k, m)) * bfloat16_to_float(b(k, n));
|
||||
v += ck::bf16_to_f32(a(k, m)) * ck::bf16_to_f32(b(k, n));
|
||||
}
|
||||
|
||||
c(m, n) = float_to_bfloat16(v);
|
||||
c(m, n) = ck::f32_to_bf16(v);
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(f_km_kn_mn, c.mDesc.GetLengths()[0], c.mDesc.GetLengths()[1])(
|
||||
@@ -70,10 +70,10 @@ void host_gemm<ushort, ushort, ushort>(const Tensor<ushort>& a,
|
||||
|
||||
for(int k = 0; k < K; ++k)
|
||||
{
|
||||
v += bfloat16_to_float(a(k, m)) * bfloat16_to_float(b(n, k));
|
||||
v += ck::bf16_to_f32(a(k, m)) * ck::bf16_to_f32(b(n, k));
|
||||
}
|
||||
|
||||
c(m, n) = float_to_bfloat16(v);
|
||||
c(m, n) = ck::f32_to_bf16(v);
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(f_km_nk_mn, c.mDesc.GetLengths()[0], c.mDesc.GetLengths()[1])(
|
||||
@@ -88,10 +88,10 @@ void host_gemm<ushort, ushort, ushort>(const Tensor<ushort>& a,
|
||||
|
||||
for(int k = 0; k < K; ++k)
|
||||
{
|
||||
v += bfloat16_to_float(a(m, k)) * bfloat16_to_float(b(k, n));
|
||||
v += ck::bf16_to_f32(a(m, k)) * ck::bf16_to_f32(b(k, n));
|
||||
}
|
||||
|
||||
c(n, m) = float_to_bfloat16(v);
|
||||
c(n, m) = ck::f32_to_bf16(v);
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(f_mk_kn_nm, c.mDesc.GetLengths()[0], c.mDesc.GetLengths()[1])(
|
||||
@@ -106,10 +106,10 @@ void host_gemm<ushort, ushort, ushort>(const Tensor<ushort>& a,
|
||||
|
||||
for(int k = 0; k < K; ++k)
|
||||
{
|
||||
v += bfloat16_to_float(a(m, k)) * bfloat16_to_float(b(n, k));
|
||||
v += ck::bf16_to_f32(a(m, k)) * ck::bf16_to_f32(b(n, k));
|
||||
}
|
||||
|
||||
c(n, m) = float_to_bfloat16(v);
|
||||
c(n, m) = ck::f32_to_bf16(v);
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(f_mk_nk_nm, c.mDesc.GetLengths()[0], c.mDesc.GetLengths()[1])(
|
||||
@@ -124,10 +124,10 @@ void host_gemm<ushort, ushort, ushort>(const Tensor<ushort>& a,
|
||||
|
||||
for(int k = 0; k < K; ++k)
|
||||
{
|
||||
v += bfloat16_to_float(a(k, m)) * bfloat16_to_float(b(k, n));
|
||||
v += ck::bf16_to_f32(a(k, m)) * ck::bf16_to_f32(b(k, n));
|
||||
}
|
||||
|
||||
c(n, m) = float_to_bfloat16(v);
|
||||
c(n, m) = ck::f32_to_bf16(v);
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(f_km_kn_nm, c.mDesc.GetLengths()[0], c.mDesc.GetLengths()[1])(
|
||||
@@ -142,10 +142,10 @@ void host_gemm<ushort, ushort, ushort>(const Tensor<ushort>& a,
|
||||
|
||||
for(int k = 0; k < K; ++k)
|
||||
{
|
||||
v += bfloat16_to_float(a(k, m)) * bfloat16_to_float(b(n, k));
|
||||
v += ck::bf16_to_f32(a(k, m)) * ck::bf16_to_f32(b(n, k));
|
||||
}
|
||||
|
||||
c(n, m) = float_to_bfloat16(v);
|
||||
c(n, m) = ck::f32_to_bf16(v);
|
||||
};
|
||||
|
||||
make_ParallelTensorFunctor(f_km_nk_nm, c.mDesc.GetLengths()[0], c.mDesc.GetLengths()[1])(
|
||||
|
||||
@@ -321,18 +321,14 @@ void check_error(const Tensor<T>& ref, const Tensor<T>& result)
|
||||
std::cout << "max_diff: " << max_diff << ", " << ref_value << ", " << result_value << std::endl;
|
||||
}
|
||||
|
||||
float bf16_to_f32(ushort src_val)
|
||||
__host__ __device__ float bf16_to_f32(ushort src_val)
|
||||
{
|
||||
typedef union
|
||||
union
|
||||
{
|
||||
ushort x, y;
|
||||
float f32;
|
||||
} bf16_f32_t;
|
||||
|
||||
bf16_f32_t v;
|
||||
v.x = 0;
|
||||
v.y = src_val;
|
||||
return v.f32;
|
||||
uint32_t int32;
|
||||
float fp32;
|
||||
} u = {uint32_t(src_val) << 16};
|
||||
return u.fp32;
|
||||
}
|
||||
|
||||
template <>
|
||||
@@ -354,8 +350,7 @@ void check_error<ushort>(const Tensor<ushort>& ref, const Tensor<ushort>& result
|
||||
}
|
||||
|
||||
std::cout << "error: " << error << std::endl;
|
||||
std::cout << "max_diff: " << max_diff << ", ref: " << ref_value << ", res: " << result_value
|
||||
<< std::endl;
|
||||
std::cout << "max_diff: " << max_diff << ", " << ref_value << ", " << result_value << std::endl;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
#include <cmath>
|
||||
#include "config.hpp"
|
||||
#include "data_type.hpp"
|
||||
|
||||
template <typename T>
|
||||
struct GeneratorTensor_1
|
||||
@@ -24,7 +25,7 @@ struct GeneratorTensor_1<ushort>
|
||||
template <typename... Is>
|
||||
ushort operator()(Is...)
|
||||
{
|
||||
return float_to_bfloat16(value);
|
||||
return ck::f32_to_bf16(value);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -74,7 +75,7 @@ struct GeneratorTensor_2<ushort>
|
||||
ushort operator()(Is...)
|
||||
{
|
||||
float tmp = (std::rand() % (max_value - min_value)) + min_value;
|
||||
return float_to_bfloat16(tmp);
|
||||
return ck::f32_to_bf16(tmp);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -119,7 +120,7 @@ struct GeneratorTensor_3<ushort>
|
||||
|
||||
float fp32_tmp = min_value + tmp * (max_value - min_value);
|
||||
|
||||
return float_to_bfloat16(fp32_tmp);
|
||||
return ck::f32_to_bf16(fp32_tmp);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user