poc convert fnuz fp8 to non-native dtype similar to ocp (#2871)

[ROCm/composable_kernel commit: e469fee046]
This commit is contained in:
Max Podkorytov
2025-09-18 22:51:01 -07:00
committed by GitHub
parent 30b63f4c04
commit a00705b4fd
7 changed files with 80 additions and 49 deletions

View File

@@ -43,9 +43,8 @@ TEST(BF8FNUZ, ConvertFP32Nearest)
type_convert<float>(f8_convert_rne<bf8_fnuz_t>(std::numeric_limits<float>::max())),
abs_tol);
// convert inf float to bf8_fnuz_t and check if it is qNan
ASSERT_NEAR(ck::NumericLimits<bf8_fnuz_t>::QuietNaN(),
f8_convert_rne<bf8_fnuz_t>(std::numeric_limits<float>::infinity()),
abs_tol);
ASSERT_EQ(ck::NumericLimits<bf8_fnuz_t>::QuietNaN(),
f8_convert_rne<bf8_fnuz_t>(std::numeric_limits<float>::infinity()));
// positive norm float value to bf8 and back, check if holds
float pos_float = 0.0000762939f;
ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<bf8_fnuz_t>(pos_float)), abs_tol);
@@ -80,9 +79,8 @@ TEST(BF8FNUZ, ConvertFP32Stochastic)
type_convert<float>(f8_convert_sr<bf8_fnuz_t>(std::numeric_limits<float>::max())),
abs_tol);
// convert inf float to bf8_fnuz_t and check if it is qNan
ASSERT_NEAR(ck::NumericLimits<bf8_fnuz_t>::QuietNaN(),
f8_convert_sr<bf8_fnuz_t>(std::numeric_limits<float>::infinity()),
abs_tol);
ASSERT_EQ(ck::NumericLimits<bf8_fnuz_t>::QuietNaN(),
f8_convert_sr<bf8_fnuz_t>(std::numeric_limits<float>::infinity()));
// positive norm float value to bf8 and back, check if holds
float pos_float = 0.0000762939f;
ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_sr<bf8_fnuz_t>(pos_float)), abs_tol);
@@ -118,9 +116,8 @@ TEST(BF8FNUZ, ConvertFP16Nearest)
type_convert<half_t>(f8_convert_rne<bf8_fnuz_t>(ck::NumericLimits<half_t>::Max())),
abs_tol);
// convert QuietNaN fp16 to bf8_fnuz_t and check if it is QuietNaN
ASSERT_NEAR(ck::NumericLimits<bf8_fnuz_t>::QuietNaN(),
f8_convert_rne<bf8_fnuz_t>(ck::NumericLimits<half_t>::QuietNaN()),
abs_tol);
ASSERT_EQ(ck::NumericLimits<bf8_fnuz_t>::QuietNaN(),
f8_convert_rne<bf8_fnuz_t>(ck::NumericLimits<half_t>::QuietNaN()));
// positive norm fp16 value to bf8 and back, check if holds
half_t pos_half = half_t{0.0000762939};
ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<bf8_fnuz_t>(pos_half)), abs_tol);
@@ -155,9 +152,8 @@ TEST(BF8FNUZ, ConvertFP16Stochastic)
type_convert<half_t>(f8_convert_sr<bf8_fnuz_t>(ck::NumericLimits<half_t>::Max())),
abs_tol);
// convert QuietNaN fp16 to bf8_fnuz_t and check if it is QuietNaN
ASSERT_NEAR(ck::NumericLimits<bf8_fnuz_t>::QuietNaN(),
f8_convert_sr<bf8_fnuz_t>(ck::NumericLimits<half_t>::QuietNaN()),
abs_tol);
ASSERT_EQ(ck::NumericLimits<bf8_fnuz_t>::QuietNaN(),
f8_convert_sr<bf8_fnuz_t>(ck::NumericLimits<half_t>::QuietNaN()));
// positive norm fp16 value to bf8 and back, check if holds
half_t pos_half = half_t{0.0000762939};
ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_sr<bf8_fnuz_t>(pos_half)), abs_tol);

View File

@@ -48,9 +48,8 @@ TEST(FP8FNUZ, ConvertFP32Nearest)
type_convert<float>(f8_convert_rne<f8_fnuz_t>(std::numeric_limits<float>::max())),
abs_tol);
// convert inf float to f8_fnuz_t and check if it is qNan
ASSERT_NEAR(ck::NumericLimits<f8_fnuz_t>::QuietNaN(),
f8_convert_rne<f8_fnuz_t>(std::numeric_limits<float>::infinity()),
abs_tol);
ASSERT_EQ(ck::NumericLimits<f8_fnuz_t>::QuietNaN(),
f8_convert_rne<f8_fnuz_t>(std::numeric_limits<float>::infinity()));
// positive norm float value to fp8 and back, check if holds
float pos_float = 0.017578125f;
ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<f8_fnuz_t>(pos_float)), abs_tol);
@@ -85,9 +84,8 @@ TEST(FP8FNUZ, ConvertFP32Stochastic)
type_convert<float>(f8_convert_sr<f8_fnuz_t>(std::numeric_limits<float>::max())),
abs_tol);
// convert inf float to f8_fnuz_t and check if it is qNan
ASSERT_NEAR(ck::NumericLimits<f8_fnuz_t>::QuietNaN(),
f8_convert_sr<f8_fnuz_t>(std::numeric_limits<float>::infinity()),
abs_tol);
ASSERT_EQ(ck::NumericLimits<f8_fnuz_t>::QuietNaN(),
f8_convert_sr<f8_fnuz_t>(std::numeric_limits<float>::infinity()));
// positive norm float value to fp8 and back, check if holds
float pos_float = 0.017578125f;
ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_sr<f8_fnuz_t>(pos_float)), abs_tol);
@@ -122,9 +120,8 @@ TEST(FP8FNUZ, ConvertFP16Nearest)
type_convert<half_t>(f8_convert_rne<f8_fnuz_t>(ck::NumericLimits<half_t>::Max())),
abs_tol);
// convert QuietNaN fp16 to f8_fnuz_t and check if it is QuietNaN
ASSERT_NEAR(ck::NumericLimits<f8_fnuz_t>::QuietNaN(),
f8_convert_rne<f8_fnuz_t>(ck::NumericLimits<half_t>::QuietNaN()),
abs_tol);
ASSERT_EQ(ck::NumericLimits<f8_fnuz_t>::QuietNaN(),
f8_convert_rne<f8_fnuz_t>(ck::NumericLimits<half_t>::QuietNaN()));
// positive norm fp16 value to fp8 and back, check if holds
half_t pos_half = half_t{0.017578125};
ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<f8_fnuz_t>(pos_half)), abs_tol);
@@ -159,9 +156,8 @@ TEST(FP8FNUZ, ConvertFP16Stochastic)
type_convert<half_t>(f8_convert_sr<f8_fnuz_t>(ck::NumericLimits<half_t>::Max())),
abs_tol);
// convert QuietNaN fp16 to f8_fnuz_t and check if it is QuietNaN
ASSERT_NEAR(ck::NumericLimits<f8_fnuz_t>::QuietNaN(),
f8_convert_sr<f8_fnuz_t>(ck::NumericLimits<half_t>::QuietNaN()),
abs_tol);
ASSERT_EQ(ck::NumericLimits<f8_fnuz_t>::QuietNaN(),
f8_convert_sr<f8_fnuz_t>(ck::NumericLimits<half_t>::QuietNaN()));
// positive norm fp16 value to fp8 and back, check if holds
half_t pos_half = half_t{0.017578125};
ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_sr<f8_fnuz_t>(pos_half)), abs_tol);