mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 01:10:17 +00:00
Add BF16 tests for batched_gemm_softmax_gemm_permute (#504)
* fixed bug in softmax reference & add bf16 examples for batched_gemm_scale_softmax_gemm * added bf16 tests for batched_gemm_softmax_gemm_permute * changed format of device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp * changed format device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp * aligned annotations * modified CMakeLists for examples * add common example code of fp16/bf16 version for batched_gemm_scale_softmax_gemm_xdl * use macro to control the instances * added macro control into instances * clang-format some files * changed error tolerance for bf16 * changed index for 10_elementwise_normalization * fixed xdlops code bug in amd_xdlops.hpp Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
This commit is contained in:
@@ -309,8 +309,25 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
|
||||
{
|
||||
c_device_buf.FromDevice(c_gs_ms_os_device_result.mData.data());
|
||||
|
||||
pass =
|
||||
pass & ck::utils::check_err(c_gs_ms_os_device_result, c_gs_ms_os_host_result);
|
||||
// default absolute error and relative error is 0.001
|
||||
double rtol = 1e-3;
|
||||
double atol = 1e-3;
|
||||
|
||||
// when BF16 is taken, set absolute error and relative error to 0.01
|
||||
if(std::is_same_v<ADataType, ck::bhalf_t> &&
|
||||
std::is_same_v<B0DataType, ck::bhalf_t> &&
|
||||
std::is_same_v<B1DataType, ck::bhalf_t> &&
|
||||
std::is_same_v<CDataType, ck::bhalf_t>)
|
||||
{
|
||||
rtol = 1e-2;
|
||||
atol = 1e-2;
|
||||
}
|
||||
|
||||
pass = pass & ck::utils::check_err(c_gs_ms_os_device_result,
|
||||
c_gs_ms_os_host_result,
|
||||
"Error: Incorrect results!",
|
||||
rtol,
|
||||
atol);
|
||||
|
||||
if(do_log)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user