diff --git a/example/27_layernorm/run_layernorm_example.inc b/example/27_layernorm/run_layernorm_example.inc
index c8f599a39c..399165c36e 100644
--- a/example/27_layernorm/run_layernorm_example.inc
+++ b/example/27_layernorm/run_layernorm_example.inc
@@ -8,8 +8,8 @@ int run_groupnorm_example()
 {
     bool time_kernel = false;
 
-    ck::index_t M      = 1024;
-    ck::index_t N      = 1024;
+    ck::index_t M = 1024;
+    ck::index_t N = 1024;
 
     Tensor<XDataType> x({M, N});
     Tensor<GammaDataType> gamma({N});
@@ -44,9 +44,9 @@ int run_groupnorm_example()
         {0, 1},
         std::vector<ck::index_t>{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()},
         std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
-                                    save_mean.mDesc.GetStrides().end()},
+                                 save_mean.mDesc.GetStrides().end()},
         std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
-                                    save_mean.mDesc.GetStrides().end()},
+                                 save_mean.mDesc.GetStrides().end()},
         {1},
         1e-4,
         x_dev.GetDeviceBuffer(),
diff --git a/example/42_groupnorm/run_groupnorm_example.inc b/example/42_groupnorm/run_groupnorm_example.inc
index da41e90639..89117a9b94 100644
--- a/example/42_groupnorm/run_groupnorm_example.inc
+++ b/example/42_groupnorm/run_groupnorm_example.inc
@@ -65,9 +65,9 @@ int run_groupnorm_example(int argc, char* argv[])
         {0, 0, 0, C, 1},
         std::vector<ck::index_t>{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()},
         std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
-                                    save_mean.mDesc.GetStrides().end()},
+                                 save_mean.mDesc.GetStrides().end()},
         std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
-                                    save_mean.mDesc.GetStrides().end()},
+                                 save_mean.mDesc.GetStrides().end()},
         {1, 2, 4}, // reduction dimension: [H, W, C]
         1e-6,
         x_dev.GetDeviceBuffer(),
diff --git a/include/ck/utility/type_convert.hpp b/include/ck/utility/type_convert.hpp
index 12d628a4bb..89b7dc5789 100644
--- a/include/ck/utility/type_convert.hpp
+++ b/include/ck/utility/type_convert.hpp
@@ -100,6 +100,8 @@ template <>
 inline __host__ __device__ f8_t type_convert<f8_t, float>(float x)
 {
 #if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
+    float max_fp8 = 240.0f;
+    x             = x > max_fp8 ? max_fp8 : (x < -max_fp8 ? -max_fp8 : x);
     union
     {
         float fval;
diff --git a/profiler/include/profiler/profile_gemm_impl.hpp b/profiler/include/profiler/profile_gemm_impl.hpp
index ccebb020c4..0ed225c934 100644
--- a/profiler/include/profiler/profile_gemm_impl.hpp
+++ b/profiler/include/profiler/profile_gemm_impl.hpp
@@ -75,8 +75,8 @@ int profile_gemm_impl(int do_verification,
         b_k_n.GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 5});
         break;
     default:
-        a_m_k.GenerateTensorValue(GeneratorTensor_3<ADataType>{0.0, 1.0});
-        b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5});
+        a_m_k.GenerateTensorValue(GeneratorTensor_3<ADataType>{0.0, 0.1});
+        b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.05, 0.05});
     }
 
     using AElementOp = ck::tensor_operation::element_wise::PassThrough;