mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
[CK_TILE][FMHA] Add sparse attention VSA (#3341)
* add sparse attention VSA * fix the pre-commit * Add jenga test and pre-commit * add bf16 for vsa * add jenga support bf16 * remove lse arg * split kernel code to block & kernel * fix the pre-commit * fix the pre-commit * fix the copyrights * fix the copyright * fix the copyright & rename block to pipeline * fix the copyright and pipeline * remove lse & dropout & add fmt * fix the jenga&VSA code review * remove the useless code & resolved the comments * remove useless code * remove useless code * Clean up code * Remove more unused code * Re-format .hpp * Refactor codegen scripts --------- Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com> Co-authored-by: asleepzzz <hanwen.chang@amd.com>
This commit is contained in:
@@ -44,6 +44,11 @@ __device__ inline int32_t amd_wave_read_first_lane(int32_t value)
|
||||
return __builtin_amdgcn_readfirstlane(value);
|
||||
}
|
||||
|
||||
__device__ inline uint32_t amd_wave_read_first_lane(uintptr_t value)
|
||||
{
|
||||
return __builtin_amdgcn_readfirstlane(static_cast<uint32_t>(value));
|
||||
}
|
||||
|
||||
template <typename Object, std::enable_if_t<std::is_trivially_copyable_v<Object>, int> = 0>
|
||||
__device__ inline auto amd_wave_read_first_lane(const Object& obj)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user