Clip fp8 to +/-240 on all targets. (#1172)

* clip fp8 to +/-240 on all targets * if inputs to fp8 conversion are +/-inf, they remain unaltered * increase tolerance for test_elementwise_layernorm to prevent false errors * change the input values for gemm examples to floats * reduce gemm example float input values to prevent errors * increase the tolerance for gemm examples [ROCm/composable_kernel commit: d0c7b45150]
2026-05-16 10:59:55 +00:00 · 2024-02-27 12:31:05 -08:00
parent d2750747d4
commit f0f3b65e2a
4 changed files with 16 additions and 13 deletions
--- a/example/01_gemm/common.hpp
+++ b/example/01_gemm/common.hpp
@@ -49,7 +49,7 @@ struct ProblemSizeStreamK final
 struct ExecutionConfig final
 {
    bool do_verification = true;
-    int init_method      = 1;
+    int init_method      = 2;
    bool time_kernel     = false;
 };

--- a/example/01_gemm/run_gemm_example.inc
+++ b/example/01_gemm/run_gemm_example.inc
@@ -69,8 +69,8 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
        ck::utils::FillUniformDistributionIntegerValue<BDataType>{-5.f, 5.f}(b_k_n);
        break;
    default:
-        ck::utils::FillUniformDistribution<ADataType>{-1.f, 1.f}(a_m_k);
-        ck::utils::FillUniformDistribution<BDataType>{-1.f, 1.f}(b_k_n);
+        ck::utils::FillUniformDistribution<ADataType>{-0.1f, 0.1f}(a_m_k);
+        ck::utils::FillUniformDistribution<BDataType>{-0.1f, 0.1f}(b_k_n);
    }

    Tensor<CDataType> c_m_n_host_result(f_host_tensor_descriptor(M, N, StrideC, CLayout{}));
@@ -240,7 +240,8 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
 #else
        c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data());

-        return ck::utils::check_err(c_m_n_device_result, c_m_n_host_result);
+        return ck::utils::check_err(
+            c_m_n_device_result, c_m_n_host_result, "Error: Incorrect results!", 1e-1, 1e-1);
 #endif
    }