Merge commit '9fcf21a4ec4698209c4ed7b859574cc1e1986aa3' into develop

This commit is contained in:
assistant-librarian[bot]
2025-06-10 07:12:08 +00:00
parent c99140ad8b
commit 49bcedcef1
3 changed files with 18 additions and 0 deletions

View File

@@ -223,6 +223,10 @@
#define CK_TILE_FMHA_FWD_FAST_EXP2 0
#endif
#ifndef CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN
#define CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN 0
#endif
#ifndef CK_TILE_BUFFER_LOAD_RAW_BF16_WA
#define CK_TILE_BUFFER_LOAD_RAW_BF16_WA 1
#endif

View File

@@ -702,12 +702,19 @@ struct BlockFmhaBatchPrefillPipelineQRKSVSAsync
}
const auto p = [&]() {
#if CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN
// For fp32 to fp16,
// impl::cast_tile_pk_fp16_fp32 would cause precision issue,
// since it uses __builtin_amdgcn_cvt_pkrtz, which is round to zero.
return cast_tile<PDataType>(tile_elementwise_in(p_compute_element_func, p_compute));
#else
if constexpr(std::is_same_v<PDataType, fp16_t>)
return impl::cast_tile_pk_fp16_fp32<PDataType>(
tile_elementwise_in(p_compute_element_func, p_compute));
else
return cast_tile<PDataType>(
tile_elementwise_in(p_compute_element_func, p_compute));
#endif
}();
// STAGE 3, KV gemm

View File

@@ -653,12 +653,19 @@ struct BlockFmhaPipelineQRKSVSAsync
}
const auto p = [&]() {
#if CK_TILE_FMHA_FLOAT_TO_FLOAT16_RTN
// For fp32 to fp16,
// impl::cast_tile_pk_fp16_fp32 would cause precision issue,
// since it uses __builtin_amdgcn_cvt_pkrtz, which is round to zero.
return cast_tile<PDataType>(tile_elementwise_in(p_compute_element_func, p_compute));
#else
if constexpr(std::is_same_v<PDataType, fp16_t>)
return impl::cast_tile_pk_fp16_fp32<PDataType>(
tile_elementwise_in(p_compute_element_func, p_compute));
else
return cast_tile<PDataType>(
tile_elementwise_in(p_compute_element_func, p_compute));
#endif
}();
// STAGE 3, KV gemm