Add attention sink support for FMHA FWD (#3368)

* Revert "Revert "Add attn sink (#2892)" (#3250)"

This reverts commit 5adaa201ed.

* fix conflict

Signed-off-by: Linjun-AMD <Jun.Lin@amd.com>

* Add F_sink parameter to FmhaFwdPipeline

* Update tile_fmha_traits.hpp

* Refactor pipeline creation in fmha_fwd.py

Updated the pipeline creation logic to include 'sink' parameter in product combinations and adjusted the FmhaFwdPipeline calls accordingly.

* Update fmha_fwd.py

* Update fmha_fwd.py

* Update example/ck_tile/01_fmha/script/correct_test_fwd_sink.sh

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* update CHANGELOG.md

Signed-off-by: Linjun-AMD <Jun.Lin@amd.com>

* Update CHANGELOG with new features and support

* Update fmha_fwd.hpp

* Update CHANGELOG.md

* Update smoke_test_fwd_sink.sh

* Update correct_test_fwd_sink.sh

* Update smoke_test_fwd_sink.sh

---------

Signed-off-by: Linjun-AMD <Jun.Lin@amd.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Linjun-AMD
2025-12-15 12:21:59 +08:00
committed by GitHub
parent 22b945e06e
commit f5573f56d9
26 changed files with 948 additions and 196 deletions

View File

@@ -162,6 +162,17 @@ struct StandardAttention
{
return !params.impl_mask.IsOutOfBound(qo_idx, kv_idx);
}
template <typename Params>
__device__ __forceinline__ bool LogitsSinkMask(const Params& params,
[[maybe_unused]] uint32_t batch_idx,
uint32_t qo_idx,
uint32_t kv_idx,
[[maybe_unused]] uint32_t qo_head_idx,
[[maybe_unused]] uint32_t kv_head_idx) const
{
return !params.impl_mask.IsOutOfSinkBound(qo_idx, kv_idx);
}
};
template <bool UseExp2 = false>
@@ -224,6 +235,17 @@ struct LogitsSoftCap
{
return !params.impl_mask.IsOutOfBound(qo_idx, kv_idx);
}
template <typename Params>
__device__ __forceinline__ bool LogitsSinkMask(const Params& params,
[[maybe_unused]] uint32_t batch_idx,
uint32_t qo_idx,
uint32_t kv_idx,
[[maybe_unused]] uint32_t qo_head_idx,
[[maybe_unused]] uint32_t kv_head_idx) const
{
return !params.impl_mask.IsOutOfSinkBound(qo_idx, kv_idx);
}
};
constexpr uint32_t CUSTOM_MASK = 1U;
@@ -297,6 +319,17 @@ struct ComposedAttention
{
return !params.impl_mask.IsOutOfBound(qo_idx, kv_idx);
}
template <typename Params>
__device__ __forceinline__ bool LogitsSinkMask(const Params& params,
[[maybe_unused]] uint32_t batch_idx,
uint32_t qo_idx,
uint32_t kv_idx,
[[maybe_unused]] uint32_t qo_head_idx,
[[maybe_unused]] uint32_t kv_head_idx) const
{
return !params.impl_mask.IsOutOfSinkBound(qo_idx, kv_idx);
}
};
} // namespace ck_tile