diff --git a/example/01_gemm/gemm_xdl_fp8.cpp b/example/01_gemm/gemm_xdl_fp8.cpp
index 7d8538681b..fe41602301 100644
--- a/example/01_gemm/gemm_xdl_fp8.cpp
+++ b/example/01_gemm/gemm_xdl_fp8.cpp
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
 
 #include "common.hpp"
 
@@ -7,7 +7,7 @@
 
 using ADataType        = ck::f8_t;
 using BDataType        = ck::f8_t;
-using CDataType        = ck::half_t;
+using CDataType        = ck::f8_t;
 using AccDataType      = float;
 using CShuffleDataType = float;
 
diff --git a/example/01_gemm/run_gemm_example.inc b/example/01_gemm/run_gemm_example.inc
index cb15186c3b..0e42c91d41 100644
--- a/example/01_gemm/run_gemm_example.inc
+++ b/example/01_gemm/run_gemm_example.inc
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
 
 #pragma once
 
@@ -34,11 +34,11 @@ inline __host__ __device__ constexpr double get_rtol()
     }
     else if constexpr(std::is_same_v<DataType, ck::f8_t>)
     {
-        return 1e-1; // 240 and 224 are acceptable
+        return 2e-1;
     }
     else if constexpr(std::is_same_v<DataType, ck::bf8_t>)
     {
-        return 1.5e-1; // 57344 and 49152 are acceptable
+        return 2e-1;
     }
     else
     {
@@ -75,11 +75,11 @@ inline __host__ __device__ constexpr double get_atol()
     }
     else if constexpr(std::is_same_v<DataType, ck::f8_t>)
     {
-        return 16.1; // 240 and 224 are acceptable
+        return 2e-1;
     }
     else if constexpr(std::is_same_v<DataType, ck::bf8_t>)
     {
-        return 8192.1; // 57344 and 49152 are acceptable
+        return 2e-1;
     }
     else
     {
diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp
index 9528a30b4b..5f74d51a65 100644
--- a/include/ck/ck.hpp
+++ b/include/ck/ck.hpp
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
 
 #pragma once
 
@@ -153,8 +153,8 @@ CK_DECLARE_ENV_VAR_BOOL(CK_LOGGING)
 // LDS direct loads using inline assembly
 #define CK_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM 0
 
-// set stochastic rounding as default for f8 conversions
-#define CK_USE_SR_F8_CONVERSION 1
+// set rounding to nearest even as default for f8 conversions
+#define CK_USE_SR_F8_CONVERSION 0
 
 // block synchronization only s_wait lgkmcnt(0), not vmcnt(0)
 #define CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM 1
diff --git a/library/include/ck/library/utility/check_err.hpp b/library/include/ck/library/utility/check_err.hpp
index 9f4212ebd4..58479f2127 100644
--- a/library/include/ck/library/utility/check_err.hpp
+++ b/library/include/ck/library/utility/check_err.hpp
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
 
 #pragma once
 
@@ -272,7 +272,8 @@ check_err(const Range& out,
     }
     if(!res)
     {
-        std::cerr << std::setw(12) << std::setprecision(7) << "max err: " << max_err << std::endl;
+        std::cerr << std::setw(12) << std::setprecision(7) << "max err: " << max_err
+                  << " number of errors: " << err_count << std::endl;
     }
     return res;
 }
diff --git a/test/data_type/CMakeLists.txt b/test/data_type/CMakeLists.txt
index 0ebfc931ac..75e098ce48 100644
--- a/test/data_type/CMakeLists.txt
+++ b/test/data_type/CMakeLists.txt
@@ -1,3 +1,13 @@
+if (GPU_TARGETS)
+    if (GPU_TARGETS MATCHES "gfx10" OR GPU_TARGETS MATCHES "gfx11")
+        add_definitions(-DCK_SKIP_FLAKY_F8_TEST)
+        set(CK_SKIP_FLAKY_F8_TEST "ON")
+    endif()
+else()
+    add_definitions(-DCK_SKIP_FLAKY_F8_TEST)
+    set(CK_SKIP_FLAKY_F8_TEST "ON")
+endif()
+
 if (USE_BITINT_EXTENSION_INT4)
   add_gtest_executable(test_int4 test_int4.cpp)
   if(result EQUAL 0)
diff --git a/test/data_type/test_bf8.cpp b/test/data_type/test_bf8.cpp
index 6a5fa281e8..6f50db68c7 100644
--- a/test/data_type/test_bf8.cpp
+++ b/test/data_type/test_bf8.cpp
@@ -1,11 +1,12 @@
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
 
 #include "gtest/gtest.h"
 #include "ck/utility/data_type.hpp"
 #include "ck/utility/type_convert.hpp"
 
 using ck::bf8_t;
+using ck::f8_convert_rne;
 using ck::f8_convert_sr;
 using ck::half_t;
 using ck::type_convert;
@@ -24,33 +25,36 @@ TEST(BF8, ConvertFP32Nearest)
     // fix the tolerance value
     float abs_tol = 1e-6;
     // convert 0 float to bf8 and back, check if holds
-    ASSERT_NEAR(0.0f, type_convert<float>(type_convert<bf8_t>(0.0f)), abs_tol);
+    ASSERT_NEAR(0.0f, type_convert<float>(f8_convert_rne<bf8_t>(0.0f)), abs_tol);
+    // don't run the next test on gfx11 devices
+#ifndef CK_SKIP_FLAKY_F8_TEST
     // convert minimal float to bf8 and back, check if holds
     ASSERT_NEAR(std::numeric_limits<float>::min(),
-                type_convert<float>(type_convert<bf8_t>(std::numeric_limits<float>::min())),
+                type_convert<float>(f8_convert_rne<bf8_t>(std::numeric_limits<float>::min())),
                 abs_tol);
+#endif
     // convert maximal bf8_t to float and check if equal to 57344.0
-    ASSERT_NEAR(57344.0f, type_convert<float>(type_convert<bf8_t>(57344.0f)), abs_tol);
+    ASSERT_NEAR(57344.0f, type_convert<float>(f8_convert_rne<bf8_t>(57344.0f)), abs_tol);
     // convert maximal float to bf8 and back, check if clipped to 57344.0
     ASSERT_NEAR(57344.0f,
-                type_convert<float>(type_convert<bf8_t>(std::numeric_limits<float>::max())),
+                type_convert<float>(f8_convert_rne<bf8_t>(std::numeric_limits<float>::max())),
                 abs_tol);
     // convert inf float to bf8_t and check if it is qNan
     ASSERT_NEAR(type_convert<bf8_t>(0x80),
-                type_convert<bf8_t>(std::numeric_limits<float>::infinity()),
+                f8_convert_rne<bf8_t>(std::numeric_limits<float>::infinity()),
                 abs_tol);
     // positive norm float value to bf8 and back, check if holds
     float pos_float = 0.0000762939f;
-    ASSERT_NEAR(pos_float, type_convert<float>(type_convert<bf8_t>(pos_float)), abs_tol);
+    ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<bf8_t>(pos_float)), abs_tol);
     // negative norm float value to bf8 and back, check if holds
     float neg_float = -0.0000610351f;
-    ASSERT_NEAR(neg_float, type_convert<float>(type_convert<bf8_t>(neg_float)), abs_tol);
+    ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<bf8_t>(neg_float)), abs_tol);
     // positive subnorm float value to bf8 and back, check if holds
     pos_float = 0.0000305175f;
-    ASSERT_NEAR(pos_float, type_convert<float>(type_convert<bf8_t>(pos_float)), abs_tol);
+    ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<bf8_t>(pos_float)), abs_tol);
     // negative subnorm float value to bf8 and back, check if holds
     neg_float = -0.0000152587f;
-    ASSERT_NEAR(neg_float, type_convert<float>(type_convert<bf8_t>(neg_float)), abs_tol);
+    ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<bf8_t>(neg_float)), abs_tol);
 }
 
 TEST(BF8, ConvertFP32Stochastic)
@@ -92,34 +96,34 @@ TEST(BF8, ConvertFP16Nearest)
     // fix the tolerance value
     float abs_tol = 1e-3;
     // convert 0 fp16 to bf8 and back, check if holds
-    ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(type_convert<bf8_t>(half_t{0.0})), abs_tol);
+    ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(f8_convert_rne<bf8_t>(half_t{0.0})), abs_tol);
     // convert minimal fp16 to bf8 and back, check if holds
     ASSERT_NEAR(ck::NumericLimits<half_t>::Min(),
-                type_convert<half_t>(type_convert<bf8_t>(ck::NumericLimits<half_t>::Min())),
+                type_convert<half_t>(f8_convert_rne<bf8_t>(ck::NumericLimits<half_t>::Min())),
                 abs_tol);
     // convert maximal bf8_t to fp16 and check if equal to 57344.0
     ASSERT_NEAR(
-        half_t{57344.0}, type_convert<half_t>(type_convert<bf8_t>(half_t{57344.0})), abs_tol);
+        half_t{57344.0}, type_convert<half_t>(f8_convert_rne<bf8_t>(half_t{57344.0})), abs_tol);
     // convert maximal fp16 to bf8 and back, check if clipped to 57344.0
     ASSERT_NEAR(half_t{57344.0},
-                type_convert<half_t>(type_convert<bf8_t>(ck::NumericLimits<half_t>::Max())),
+                type_convert<half_t>(f8_convert_rne<bf8_t>(ck::NumericLimits<half_t>::Max())),
                 abs_tol);
     // convert QuietNaN fp16 to bf8_t and check if it is QuietNaN
     ASSERT_NEAR(type_convert<bf8_t>(0x80),
-                type_convert<bf8_t>(ck::NumericLimits<half_t>::QuietNaN()),
+                f8_convert_rne<bf8_t>(ck::NumericLimits<half_t>::QuietNaN()),
                 abs_tol);
     // positive norm fp16 value to bf8 and back, check if holds
     half_t pos_half = half_t{0.0000762939};
-    ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<bf8_t>(pos_half)), abs_tol);
+    ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<bf8_t>(pos_half)), abs_tol);
     // negative norm fp16 value to bf8 and back, check if holds
     half_t neg_half = half_t{-0.0000610351};
-    ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<bf8_t>(neg_half)), abs_tol);
+    ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<bf8_t>(neg_half)), abs_tol);
     // positive subnorm fp16 value to bf8 and back, check if holds
     pos_half = half_t{0.0000305175};
-    ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<bf8_t>(pos_half)), abs_tol);
+    ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<bf8_t>(pos_half)), abs_tol);
     // negative subnorm fp16 value to bf8 and back, check if holds
     neg_half = half_t{-0.0000152587};
-    ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<bf8_t>(neg_half)), abs_tol);
+    ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<bf8_t>(neg_half)), abs_tol);
 }
 
 TEST(BF8, ConvertFP16Stochastic)
diff --git a/test/data_type/test_fp8.cpp b/test/data_type/test_fp8.cpp
index 0612a1cf44..25d9d9d2fb 100644
--- a/test/data_type/test_fp8.cpp
+++ b/test/data_type/test_fp8.cpp
@@ -1,10 +1,11 @@
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
 
 #include "gtest/gtest.h"
 #include "ck/utility/data_type.hpp"
 #include "ck/utility/type_convert.hpp"
 
+using ck::f8_convert_rne;
 using ck::f8_convert_sr;
 using ck::f8_t;
 using ck::half_t;
@@ -24,33 +25,36 @@ TEST(FP8, ConvertFP32Nearest)
     // fix the tolerance value
     float abs_tol = 1e-6;
     // convert 0 float to fp8 and back, check if holds
-    ASSERT_NEAR(0.0f, type_convert<float>(type_convert<f8_t>(0.0f)), abs_tol);
+    ASSERT_NEAR(0.0f, type_convert<float>(f8_convert_rne<f8_t>(0.0f)), abs_tol);
+    // don't run the next test on gfx11 devices
+#ifndef CK_SKIP_FLAKY_F8_TEST
     // convert minimal float to fp8 and back, check if holds
     ASSERT_NEAR(std::numeric_limits<float>::min(),
-                type_convert<float>(type_convert<f8_t>(std::numeric_limits<float>::min())),
+                type_convert<float>(f8_convert_rne<f8_t>(std::numeric_limits<float>::min())),
                 abs_tol);
+#endif
     // convert maximal f8_t to float and check if equal to 240.0
-    ASSERT_NEAR(240.0f, type_convert<float>(type_convert<f8_t>(240.0f)), abs_tol);
+    ASSERT_NEAR(240.0f, type_convert<float>(f8_convert_rne<f8_t>(240.0f)), abs_tol);
     // convert maximal float to fp8 and back, check if clipped to 240.0
     ASSERT_NEAR(240.0f,
-                type_convert<float>(type_convert<f8_t>(std::numeric_limits<float>::max())),
+                type_convert<float>(f8_convert_rne<f8_t>(std::numeric_limits<float>::max())),
                 abs_tol);
     // convert inf float to f8_t and check if it is qNan
     ASSERT_NEAR(type_convert<f8_t>(0x80),
-                type_convert<f8_t>(std::numeric_limits<float>::infinity()),
+                f8_convert_rne<f8_t>(std::numeric_limits<float>::infinity()),
                 abs_tol);
     // positive norm float value to fp8 and back, check if holds
     float pos_float = 0.017578125f;
-    ASSERT_NEAR(pos_float, type_convert<float>(type_convert<f8_t>(pos_float)), abs_tol);
+    ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<f8_t>(pos_float)), abs_tol);
     // negative norm float value to fp8 and back, check if holds
     float neg_float = -0.015625f;
-    ASSERT_NEAR(neg_float, type_convert<float>(type_convert<f8_t>(neg_float)), abs_tol);
+    ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<f8_t>(neg_float)), abs_tol);
     // positive subnorm float value to fp8 and back, check if holds
     pos_float = 0.00390625f;
-    ASSERT_NEAR(pos_float, type_convert<float>(type_convert<f8_t>(pos_float)), abs_tol);
+    ASSERT_NEAR(pos_float, type_convert<float>(f8_convert_rne<f8_t>(pos_float)), abs_tol);
     // negative subnorm float value to fp8 and back, check if holds
     neg_float = -0.001953125f;
-    ASSERT_NEAR(neg_float, type_convert<float>(type_convert<f8_t>(neg_float)), abs_tol);
+    ASSERT_NEAR(neg_float, type_convert<float>(f8_convert_rne<f8_t>(neg_float)), abs_tol);
 }
 
 TEST(FP8, ConvertFP32Stochastic)
@@ -92,33 +96,33 @@ TEST(FP8, ConvertFP16Nearest)
     // fix the tolerance value
     float abs_tol = 1e-3;
     // convert 0 fp16 to fp8 and back, check if holds
-    ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(type_convert<f8_t>(half_t{0.0})), abs_tol);
+    ASSERT_NEAR(half_t{0.0}, type_convert<half_t>(f8_convert_rne<f8_t>(half_t{0.0})), abs_tol);
     // convert minimal fp16 to fp8 and back, check if holds
     ASSERT_NEAR(ck::NumericLimits<half_t>::Min(),
-                type_convert<half_t>(type_convert<f8_t>(ck::NumericLimits<half_t>::Min())),
+                type_convert<half_t>(f8_convert_rne<f8_t>(ck::NumericLimits<half_t>::Min())),
                 abs_tol);
     // convert maximal f8_t to fp16 and check if equal to 240.0
-    ASSERT_NEAR(half_t{240.0}, type_convert<half_t>(type_convert<f8_t>(half_t{240.0})), abs_tol);
+    ASSERT_NEAR(half_t{240.0}, type_convert<half_t>(f8_convert_rne<f8_t>(half_t{240.0})), abs_tol);
     // convert maximal fp16 to fp8 and back, check if clipped to 240.0
     ASSERT_NEAR(half_t{240.0},
-                type_convert<half_t>(type_convert<f8_t>(ck::NumericLimits<half_t>::Max())),
+                type_convert<half_t>(f8_convert_rne<f8_t>(ck::NumericLimits<half_t>::Max())),
                 abs_tol);
     // convert QuietNaN fp16 to f8_t and check if it is QuietNaN
     ASSERT_NEAR(type_convert<f8_t>(0x80),
-                type_convert<f8_t>(ck::NumericLimits<half_t>::QuietNaN()),
+                f8_convert_rne<f8_t>(ck::NumericLimits<half_t>::QuietNaN()),
                 abs_tol);
     // positive norm fp16 value to fp8 and back, check if holds
     half_t pos_half = half_t{0.017578125};
-    ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<f8_t>(pos_half)), abs_tol);
+    ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<f8_t>(pos_half)), abs_tol);
     // negative norm fp16 value to fp8 and back, check if holds
     half_t neg_half = half_t{-0.015625};
-    ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<f8_t>(neg_half)), abs_tol);
+    ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<f8_t>(neg_half)), abs_tol);
     // positive subnorm fp16 value to fp8 and back, check if holds
     pos_half = half_t{0.00390625};
-    ASSERT_NEAR(pos_half, type_convert<half_t>(type_convert<f8_t>(pos_half)), abs_tol);
+    ASSERT_NEAR(pos_half, type_convert<half_t>(f8_convert_rne<f8_t>(pos_half)), abs_tol);
     // negative subnorm fp16 value to fp8 and back, check if holds
     neg_half = half_t{-0.001953125};
-    ASSERT_NEAR(neg_half, type_convert<half_t>(type_convert<f8_t>(neg_half)), abs_tol);
+    ASSERT_NEAR(neg_half, type_convert<half_t>(f8_convert_rne<f8_t>(neg_half)), abs_tol);
 }
 
 TEST(FP8, ConvertFP16Stochastic)