Merge commit 'f875ab0bbc6ea68a689a688a58f9a53ad12fd536' into develop

This commit is contained in:
assistant-librarian[bot]
2025-11-28 09:13:31 +00:00
parent 6032baee56
commit 296bf24afd
2 changed files with 10 additions and 2 deletions

View File

@@ -102,6 +102,9 @@ CK_TILE_DEVICE void atomic_add(X* p_dst, const X& x);
template <>
CK_TILE_DEVICE void atomic_add<bf16x2_t>(bf16x2_t* p_dst, const bf16x2_t& x)
{
#if HAS_GLOBAL_ATOMIC_PK_ADD_BUILTIN
__builtin_amdgcn_global_atomic_fadd_v2bf16(c_style_pointer_cast<bf16x2_t*>(p_dst), x);
#else
union U32BF162_ADDR
{
uint32_t* u32_a;
@@ -128,6 +131,7 @@ CK_TILE_DEVICE void atomic_add<bf16x2_t>(bf16x2_t* p_dst, const bf16x2_t& x)
new_v = new_.u32;
cur_v.u32 = atomicCAS(dword_addr.u32_a, old_v, new_v);
} while(cur_v.u32 != old_v);
#endif
}
template <>