mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
Conv:TF32: add more instances - 2 (#2879)
* add instances of device_grouped_conv_fwd_xdl_f32_comp_instances
* add instances of device_grouped_conv_fwd_xdl_f32_tf32_mem_instances
* add instances of device_grouped_conv_fwd_xdl_large_tensor_f32_tf32_instances
* tf32:conv:add instances for base class DeviceConvFwd
* tf32:conv:add instances for base class DeviceGroupedConvBwdDataMultipleD
* tf32:conv:add instances for base class DeviceGroupedConvBwdWeight
* add tf32 in profiler
* remove gnhwc/ngchw/ngcdhw instances
* remove non-ndhwgc/nhwgc/nhwc instances
* add check in IsSupportedArgument()
[ROCm/composable_kernel commit: fada1a3cae]
This commit is contained in:
@@ -31,13 +31,15 @@ double get_relative_threshold(const int number_of_accumulations = 1)
|
||||
using F16 = ck::half_t;
|
||||
using BF16 = ck::bhalf_t;
|
||||
using F32 = float;
|
||||
using TF32 = ck::tf32_t;
|
||||
using I8 = int8_t;
|
||||
using I32 = int32_t;
|
||||
|
||||
static_assert(is_same_v<ComputeDataType, F4> || is_same_v<ComputeDataType, F8> ||
|
||||
is_same_v<ComputeDataType, F16> || is_same_v<ComputeDataType, BF16> ||
|
||||
is_same_v<ComputeDataType, F32> || is_same_v<ComputeDataType, I8> ||
|
||||
is_same_v<ComputeDataType, I32> || is_same_v<ComputeDataType, int>,
|
||||
is_same_v<ComputeDataType, F32> || is_same_v<ComputeDataType, TF32> ||
|
||||
is_same_v<ComputeDataType, I8> || is_same_v<ComputeDataType, I32> ||
|
||||
is_same_v<ComputeDataType, int>,
|
||||
"Warning: Unhandled ComputeDataType for setting up the relative threshold!");
|
||||
double compute_error = 0;
|
||||
if constexpr(is_same_v<ComputeDataType, I8> || is_same_v<ComputeDataType, I32> ||
|
||||
@@ -52,8 +54,9 @@ double get_relative_threshold(const int number_of_accumulations = 1)
|
||||
|
||||
static_assert(is_same_v<OutDataType, F4> || is_same_v<OutDataType, F8> ||
|
||||
is_same_v<OutDataType, F16> || is_same_v<OutDataType, BF16> ||
|
||||
is_same_v<OutDataType, F32> || is_same_v<OutDataType, I8> ||
|
||||
is_same_v<OutDataType, I32> || is_same_v<OutDataType, int>,
|
||||
is_same_v<OutDataType, F32> || is_same_v<ComputeDataType, TF32> ||
|
||||
is_same_v<OutDataType, I8> || is_same_v<OutDataType, I32> ||
|
||||
is_same_v<OutDataType, int>,
|
||||
"Warning: Unhandled OutDataType for setting up the relative threshold!");
|
||||
double output_error = 0;
|
||||
if constexpr(is_same_v<OutDataType, I8> || is_same_v<OutDataType, I32> ||
|
||||
@@ -69,8 +72,9 @@ double get_relative_threshold(const int number_of_accumulations = 1)
|
||||
|
||||
static_assert(is_same_v<AccDataType, F4> || is_same_v<AccDataType, F8> ||
|
||||
is_same_v<AccDataType, F16> || is_same_v<AccDataType, BF16> ||
|
||||
is_same_v<AccDataType, F32> || is_same_v<AccDataType, I8> ||
|
||||
is_same_v<AccDataType, I32> || is_same_v<AccDataType, int>,
|
||||
is_same_v<AccDataType, F32> || is_same_v<ComputeDataType, TF32> ||
|
||||
is_same_v<AccDataType, I8> || is_same_v<AccDataType, I32> ||
|
||||
is_same_v<AccDataType, int>,
|
||||
"Warning: Unhandled AccDataType for setting up the relative threshold!");
|
||||
double acc_error = 0;
|
||||
if constexpr(is_same_v<AccDataType, I8> || is_same_v<AccDataType, I32> ||
|
||||
@@ -93,13 +97,15 @@ double get_absolute_threshold(const double max_possible_num, const int number_of
|
||||
using F16 = ck::half_t;
|
||||
using BF16 = ck::bhalf_t;
|
||||
using F32 = float;
|
||||
using TF32 = ck::tf32_t;
|
||||
using I8 = int8_t;
|
||||
using I32 = int32_t;
|
||||
|
||||
static_assert(is_same_v<ComputeDataType, F4> || is_same_v<ComputeDataType, F8> ||
|
||||
is_same_v<ComputeDataType, F16> || is_same_v<ComputeDataType, BF16> ||
|
||||
is_same_v<ComputeDataType, F32> || is_same_v<ComputeDataType, I8> ||
|
||||
is_same_v<ComputeDataType, I32> || is_same_v<ComputeDataType, int>,
|
||||
is_same_v<ComputeDataType, F32> || is_same_v<ComputeDataType, TF32> ||
|
||||
is_same_v<ComputeDataType, I8> || is_same_v<ComputeDataType, I32> ||
|
||||
is_same_v<ComputeDataType, int>,
|
||||
"Warning: Unhandled ComputeDataType for setting up the absolute threshold!");
|
||||
auto expo = std::log2(std::abs(max_possible_num));
|
||||
double compute_error = 0;
|
||||
@@ -115,8 +121,9 @@ double get_absolute_threshold(const double max_possible_num, const int number_of
|
||||
|
||||
static_assert(is_same_v<OutDataType, F4> || is_same_v<OutDataType, F8> ||
|
||||
is_same_v<OutDataType, F16> || is_same_v<OutDataType, BF16> ||
|
||||
is_same_v<OutDataType, F32> || is_same_v<OutDataType, I8> ||
|
||||
is_same_v<OutDataType, I32> || is_same_v<OutDataType, int>,
|
||||
is_same_v<OutDataType, F32> || is_same_v<ComputeDataType, TF32> ||
|
||||
is_same_v<OutDataType, I8> || is_same_v<OutDataType, I32> ||
|
||||
is_same_v<OutDataType, int>,
|
||||
"Warning: Unhandled OutDataType for setting up the absolute threshold!");
|
||||
double output_error = 0;
|
||||
if constexpr(is_same_v<OutDataType, I8> || is_same_v<OutDataType, I32> ||
|
||||
@@ -132,8 +139,9 @@ double get_absolute_threshold(const double max_possible_num, const int number_of
|
||||
|
||||
static_assert(is_same_v<AccDataType, F4> || is_same_v<AccDataType, F8> ||
|
||||
is_same_v<AccDataType, F16> || is_same_v<AccDataType, BF16> ||
|
||||
is_same_v<AccDataType, F32> || is_same_v<AccDataType, I8> ||
|
||||
is_same_v<AccDataType, I32> || is_same_v<AccDataType, int>,
|
||||
is_same_v<AccDataType, F32> || is_same_v<ComputeDataType, TF32> ||
|
||||
is_same_v<AccDataType, I8> || is_same_v<AccDataType, I32> ||
|
||||
is_same_v<AccDataType, int>,
|
||||
"Warning: Unhandled AccDataType for setting up the absolute threshold!");
|
||||
double acc_error = 0;
|
||||
if constexpr(is_same_v<AccDataType, I8> || is_same_v<AccDataType, I32> ||
|
||||
@@ -149,11 +157,67 @@ double get_absolute_threshold(const double max_possible_num, const int number_of
|
||||
return std::max(acc_error, midway_error);
|
||||
}
|
||||
|
||||
template <typename Range, typename RefRange>
|
||||
template <typename Range,
|
||||
typename RefRange,
|
||||
typename ComputeDataType = ranges::range_value_t<Range>>
|
||||
typename std::enable_if<
|
||||
std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
|
||||
std::is_same_v<ranges::range_value_t<Range>, float> &&
|
||||
std::is_same_v<ComputeDataType, ck::tf32_t>,
|
||||
bool>::type
|
||||
check_err(const Range& out,
|
||||
const RefRange& ref,
|
||||
const std::string& msg = "Error: Incorrect results!",
|
||||
double rtol = 1e-5,
|
||||
double atol = 3e-5)
|
||||
{
|
||||
if(out.size() != ref.size())
|
||||
{
|
||||
std::cerr << msg << " out.size() != ref.size(), :" << out.size() << " != " << ref.size()
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool res{true};
|
||||
int err_count = 0;
|
||||
double err = 0;
|
||||
double max_err = std::numeric_limits<double>::min();
|
||||
for(std::size_t i = 0; i < ref.size(); ++i)
|
||||
{
|
||||
const double o = *std::next(std::begin(out), i);
|
||||
const double r = *std::next(std::begin(ref), i);
|
||||
err = std::abs(o - r);
|
||||
if(err > atol + rtol * std::abs(r) || !std::isfinite(o) || !std::isfinite(r))
|
||||
{
|
||||
max_err = err > max_err ? err : max_err;
|
||||
if(err_count < 5)
|
||||
{
|
||||
std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i
|
||||
<< "] != ref[" << i << "]: " << o << " != " << r << std::endl;
|
||||
}
|
||||
res = false;
|
||||
err_count++;
|
||||
}
|
||||
}
|
||||
if(!res)
|
||||
{
|
||||
const float error_percent =
|
||||
static_cast<float>(err_count) / static_cast<float>(out.size()) * 100.f;
|
||||
std::cerr << "max err: " << max_err;
|
||||
std::cerr << ", number of errors: " << err_count;
|
||||
std::cerr << ", " << error_percent << "% wrong values" << std::endl;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename Range,
|
||||
typename RefRange,
|
||||
typename ComputeDataType = ranges::range_value_t<Range>>
|
||||
typename std::enable_if<
|
||||
std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
|
||||
std::is_floating_point_v<ranges::range_value_t<Range>> &&
|
||||
!std::is_same_v<ranges::range_value_t<Range>, half_t>,
|
||||
!std::is_same_v<ranges::range_value_t<Range>, half_t> &&
|
||||
!std::is_same_v<ComputeDataType, ck::tf32_t>,
|
||||
bool>::type
|
||||
check_err(const Range& out,
|
||||
const RefRange& ref,
|
||||
@@ -200,7 +264,9 @@ check_err(const Range& out,
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename Range, typename RefRange>
|
||||
template <typename Range,
|
||||
typename RefRange,
|
||||
typename ComputeDataType = ranges::range_value_t<Range>>
|
||||
typename std::enable_if<
|
||||
std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
|
||||
std::is_same_v<ranges::range_value_t<Range>, bhalf_t>,
|
||||
@@ -251,7 +317,9 @@ check_err(const Range& out,
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename Range, typename RefRange>
|
||||
template <typename Range,
|
||||
typename RefRange,
|
||||
typename ComputeDataType = ranges::range_value_t<Range>>
|
||||
typename std::enable_if<
|
||||
std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
|
||||
std::is_same_v<ranges::range_value_t<Range>, half_t>,
|
||||
@@ -301,7 +369,9 @@ check_err(const Range& out,
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename Range, typename RefRange>
|
||||
template <typename Range,
|
||||
typename RefRange,
|
||||
typename ComputeDataType = ranges::range_value_t<Range>>
|
||||
std::enable_if_t<(std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
|
||||
std::is_integral_v<ranges::range_value_t<Range>> &&
|
||||
!std::is_same_v<ranges::range_value_t<Range>, bhalf_t> &&
|
||||
@@ -358,7 +428,9 @@ check_err(const Range& out,
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename Range, typename RefRange>
|
||||
template <typename Range,
|
||||
typename RefRange,
|
||||
typename ComputeDataType = ranges::range_value_t<Range>>
|
||||
std::enable_if_t<(std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
|
||||
std::is_same_v<ranges::range_value_t<Range>, f8_t>),
|
||||
bool>
|
||||
@@ -407,7 +479,9 @@ check_err(const Range& out,
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename Range, typename RefRange>
|
||||
template <typename Range,
|
||||
typename RefRange,
|
||||
typename ComputeDataType = ranges::range_value_t<Range>>
|
||||
std::enable_if_t<(std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
|
||||
std::is_same_v<ranges::range_value_t<Range>, bf8_t>),
|
||||
bool>
|
||||
@@ -452,7 +526,9 @@ check_err(const Range& out,
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename Range, typename RefRange>
|
||||
template <typename Range,
|
||||
typename RefRange,
|
||||
typename ComputeDataType = ranges::range_value_t<Range>>
|
||||
std::enable_if_t<(std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
|
||||
std::is_same_v<ranges::range_value_t<Range>, f4_t>),
|
||||
bool>
|
||||
|
||||
@@ -1499,6 +1499,22 @@ struct DeviceGroupedConvBwdDataMultipleD_Xdl_CShuffle_v1
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(is_same_v<AComputeType, ck::tf32_t> || is_same_v<BComputeType, ck::tf32_t>)
|
||||
{
|
||||
if(!is_tf32_supported())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(!is_same_v<AComputeType, BComputeType>)
|
||||
{
|
||||
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
||||
{
|
||||
std::cout << "ComputeDataType for A and B should be same while using TF32"
|
||||
<< std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr(!IsSplitKSupported)
|
||||
{
|
||||
|
||||
@@ -951,6 +951,22 @@ struct DeviceGroupedConvBwdWeightMultipleD_Xdl_CShuffle
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(is_same_v<ComputeTypeA, ck::tf32_t> || is_same_v<ComputeTypeB, ck::tf32_t>)
|
||||
{
|
||||
if(!is_tf32_supported())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(!is_same_v<ComputeTypeA, ComputeTypeB>)
|
||||
{
|
||||
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
||||
{
|
||||
std::cout << "ComputeDataType for A and B should be same while using TF32"
|
||||
<< std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if constexpr(NDimSpatial == 1)
|
||||
{
|
||||
if constexpr(!is_GNWC_GKXC_GNWK<InLayout, WeiLayout, OutLayout>())
|
||||
|
||||
@@ -1687,6 +1687,23 @@ struct DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle
|
||||
const index_t GemmK =
|
||||
arg.a_grid_desc_k0_m_k1_.GetLength(I0) * arg.a_grid_desc_k0_m_k1_.GetLength(I2);
|
||||
|
||||
if constexpr(is_same_v<ComputeTypeA, ck::tf32_t> || is_same_v<ComputeTypeB, ck::tf32_t>)
|
||||
{
|
||||
if(!is_tf32_supported())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(!is_same_v<ComputeTypeA, ComputeTypeB>)
|
||||
{
|
||||
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
||||
{
|
||||
std::cout << "ComputeDataType for A and B should be same while using TF32"
|
||||
<< std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if(get_warp_size() == 64)
|
||||
{
|
||||
if constexpr(NXdlPerWave64 > 0)
|
||||
|
||||
@@ -950,6 +950,22 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffle
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(is_same_v<ComputeTypeA, ck::tf32_t> || is_same_v<ComputeTypeB, ck::tf32_t>)
|
||||
{
|
||||
if(!is_tf32_supported())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(!is_same_v<ComputeTypeA, ComputeTypeB>)
|
||||
{
|
||||
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
||||
{
|
||||
std::cout << "ComputeDataType for A and B should be same while using TF32"
|
||||
<< std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if constexpr(NDimSpatial == 1)
|
||||
{
|
||||
if constexpr(!is_GNWC_GKXC_GNWK<InLayout, WeiLayout, OutLayout>())
|
||||
|
||||
@@ -1289,6 +1289,23 @@ struct DeviceGroupedConvBwdWeight_Xdl_CShuffleV3
|
||||
const index_t GemmK = arg.a_grid_desc_kbatch_k0_m_k1_.GetLength(I0) *
|
||||
arg.a_grid_desc_kbatch_k0_m_k1_.GetLength(I2);
|
||||
|
||||
if constexpr(is_same_v<ComputeTypeA, ck::tf32_t> || is_same_v<ComputeTypeB, ck::tf32_t>)
|
||||
{
|
||||
if(!is_tf32_supported())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(!is_same_v<ComputeTypeA, ComputeTypeB>)
|
||||
{
|
||||
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
||||
{
|
||||
std::cout << "ComputeDataType for A and B should be same while using TF32"
|
||||
<< std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if(get_warp_size() == 64)
|
||||
{
|
||||
if constexpr(NXdlPerWave64 > 0)
|
||||
|
||||
@@ -1399,6 +1399,25 @@ struct DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if constexpr(is_same_v<AComputeDataType, ck::tf32_t> ||
|
||||
is_same_v<BComputeDataType, ck::tf32_t>)
|
||||
{
|
||||
if(!is_tf32_supported())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(!is_same_v<AComputeDataType, BComputeDataType>)
|
||||
{
|
||||
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
||||
{
|
||||
std::cout << "ComputeDataType for A and B should be same while using TF32"
|
||||
<< std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// check ConvolutionForwardSpecialization
|
||||
if constexpr(ConvForwardSpecialization ==
|
||||
ConvolutionForwardSpecialization::Filter1x1Stride1Pad0)
|
||||
|
||||
@@ -820,6 +820,23 @@ struct DeviceGroupedConvFwdMultipleD_Xdl_CShuffle_Large_Tensor
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(is_same_v<AComputeDataType, ck::tf32_t> ||
|
||||
is_same_v<BComputeDataType, ck::tf32_t>)
|
||||
{
|
||||
if(!is_tf32_supported())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if constexpr(!is_same_v<AComputeDataType, BComputeDataType>)
|
||||
{
|
||||
if(ck::EnvIsEnabled(CK_ENV(CK_LOGGING)))
|
||||
{
|
||||
std::cout << "ComputeDataType for A and B should be same while using TF32"
|
||||
<< std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// check ConvolutionForwardSpecialization
|
||||
if constexpr(ConvForwardSpecialization ==
|
||||
ConvolutionForwardSpecialization::Filter1x1Stride1Pad0)
|
||||
|
||||
@@ -280,8 +280,8 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_bwd_weight
|
||||
using FloatBAdjusted =
|
||||
conditional_t<is_same_v<ComputeTypeB, ck::half_t>, ck::bhalf_t, ComputeTypeB>;
|
||||
#else
|
||||
using FloatAAdjusted = ComputeTypeA;
|
||||
using FloatBAdjusted = ComputeTypeB;
|
||||
using FloatAAdjusted = conditional_t<is_same_v<ComputeTypeA, ck::tf32_t>, float, ComputeTypeA>;
|
||||
using FloatBAdjusted = conditional_t<is_same_v<ComputeTypeB, ck::tf32_t>, float, ComputeTypeB>;
|
||||
#endif
|
||||
|
||||
// M0/M1/M1Padding
|
||||
@@ -760,19 +760,19 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_bwd_weight
|
||||
// register
|
||||
// sanity check
|
||||
constexpr bool is_single_rate_mfma =
|
||||
(((is_same<FloatAAdjusted, half_t>::value || is_same<FloatAAdjusted, bhalf_t>::value) &&
|
||||
(((is_same<ComputeTypeA, half_t>::value || is_same<ComputeTypeA, bhalf_t>::value) &&
|
||||
K1 <= 4) ||
|
||||
(is_same<FloatAAdjusted, int8_t>::value && K1 <= 8) ||
|
||||
((is_same<FloatAAdjusted, f8_t>::value || is_same<FloatAAdjusted, bf8_t>::value) &&
|
||||
(is_same<ComputeTypeA, int8_t>::value && K1 <= 8) ||
|
||||
((is_same<ComputeTypeA, f8_t>::value || is_same<ComputeTypeA, bf8_t>::value) &&
|
||||
K1 < 32))
|
||||
? true
|
||||
: false;
|
||||
constexpr auto is_scale_mfma = false;
|
||||
constexpr index_t KPack = math::max(K1,
|
||||
MfmaSelector<FloatAAdjusted,
|
||||
MfmaSelector<ComputeTypeA,
|
||||
MPerXdl,
|
||||
NPerXdl,
|
||||
FloatBAdjusted,
|
||||
ComputeTypeB,
|
||||
is_single_rate_mfma,
|
||||
is_scale_mfma>::selected_mfma.k_per_blk);
|
||||
|
||||
@@ -787,7 +787,9 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_bwd_weight
|
||||
NPerXdl,
|
||||
MRepeat,
|
||||
NRepeat,
|
||||
KPack>{};
|
||||
KPack,
|
||||
ComputeTypeA,
|
||||
ComputeTypeB>{};
|
||||
|
||||
auto c_thread_buf = blockwise_gemm.GetCThreadBuffer();
|
||||
|
||||
|
||||
@@ -45,6 +45,24 @@ struct NumericUtils<float>
|
||||
using bitwise_type = uint32_t;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct NumericUtils<ck::tf32_t>
|
||||
{
|
||||
static constexpr int exp = 8;
|
||||
static constexpr int mant = 10;
|
||||
static constexpr int bias = 127;
|
||||
static constexpr uint32_t nan_mask = 0x7F800000;
|
||||
static constexpr uint32_t head_mask = 0xFF800000;
|
||||
static constexpr uint32_t mant_mask = 0x7FFFFF;
|
||||
static constexpr uint32_t exp_mask = 0xFF;
|
||||
static constexpr uint32_t Inf = 0x7F800000;
|
||||
static constexpr uint32_t NegInf = 0xFF800000;
|
||||
static constexpr uint32_t NaN = 0x7F800001;
|
||||
static constexpr uint32_t Neg0 = 0x80000000;
|
||||
static constexpr bool has_inf = true;
|
||||
using bitwise_type = uint32_t;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct NumericUtils<half_t>
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user