Using __builtin_amdgcn_rcpf in siLU function

This commit is contained in:
Qianfeng Zhang
2025-04-24 06:28:16 +00:00
parent aec19176d4
commit 7848d15d39

View File

@@ -247,7 +247,7 @@ struct HstuAttentionFwdPipelineQRKSVS
const auto f_silu = [](CompDataType& x) {
const auto neg_one = ck_tile::type_convert<CompDataType>(-1.0f);
return x = x / (neg_one - exp(x));
x = x * __builtin_amdgcn_rcpf(neg_one - exp(x));
};
using OaccBlockTileType = decltype(gemm_1.MakeCBlockTile());