fix the s_waitcnt_imm calculation

This commit is contained in:
aska-0096
2025-07-16 05:39:50 +00:00
parent ec0a45b29f
commit c30f8b709b
3 changed files with 5 additions and 2 deletions

View File

@@ -116,8 +116,10 @@ CK_TILE_DEVICE void block_sync_load_raw(index_t cnt = 0)
template <index_t vmcnt>
CK_TILE_DEVICE void block_sync_lds_direct_load()
{
// we maximum track 64 insts back
static_assert(vmcnt <= 63);
// We don't sync the lds insts here.
constexpr auto s_waitcnt_imm = 3952 + ((vmcnt >> 4) << 14) + (vmcnt & 0xff);
constexpr auto s_waitcnt_imm = 3952 + (((vmcnt & 0xf0) << 10) | (vmcnt & 0xf));
__builtin_amdgcn_s_waitcnt(s_waitcnt_imm);
__builtin_amdgcn_s_barrier();
}