From 7848d15d398a5cb12aedbcc9a08da801aa0ef14f Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Thu, 24 Apr 2025 06:28:16 +0000 Subject: [PATCH] Using __builtin_amdgcn_rcpf in siLU function --- .../ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline.hpp index ea8d60ef6f..a0ce38c92d 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline.hpp @@ -247,7 +247,7 @@ struct HstuAttentionFwdPipelineQRKSVS const auto f_silu = [](CompDataType& x) { const auto neg_one = ck_tile::type_convert(-1.0f); - return x = x / (neg_one - exp(x)); + x = x * __builtin_amdgcn_rcpf(neg_one - exp(x)); }; using OaccBlockTileType = decltype(gemm_1.MakeCBlockTile());