Slightly increase tolerance for batchnorm profiler

This commit is contained in:
Graner, Johannes
2026-01-13 07:06:52 -05:00
parent c9f0a5c15a
commit a3b2475229

View File

@@ -430,7 +430,17 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
{
out_device_buf.FromDevice(device_output.mData.data());
pass = pass & ck::utils::check_err(device_output, host_output);
if constexpr(std::is_same_v<OutDataType, ck::half_t>)
{
// The batchnorm involves operations which can cause small numerical
// differences between host and device implementations
static double rtol = 1e-2;
pass = pass & ck::utils::check_err(device_output, host_output, rtol);
}
else
{
pass = pass & ck::utils::check_err(device_output, host_output);
}
if(do_log)
{