mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
update s_barrier's logic in gfx12 architecture (#3003)
change s_waitcnt's logic in gfx1250 change s_waitcnt's logic in gfx1250 update comment
This commit is contained in:
@@ -797,7 +797,7 @@ struct MoeSortingKernel
|
||||
else
|
||||
smem_tokens(curr_token_id, eid)++;
|
||||
}
|
||||
__builtin_amdgcn_s_waitcnt(0xc07f);
|
||||
s_waitcnt<waitcnt_arg::kMaxVmCnt, waitcnt_arg::kMaxExpCnt, 0>();
|
||||
}
|
||||
__syncthreads(); // make sure different i_token iteration not overlap by different wave
|
||||
}
|
||||
@@ -922,7 +922,7 @@ struct MoeSortingKernel
|
||||
// NOTE: this waitcnt is a must, compiler will not generate waitcnt lgkmcnt()
|
||||
// for above write however __syncthreads will cause barrier with waves other
|
||||
// than 0(which is not we want)
|
||||
__builtin_amdgcn_s_waitcnt(0xc07f);
|
||||
s_waitcnt<waitcnt_arg::kMaxVmCnt, waitcnt_arg::kMaxExpCnt, 0>();
|
||||
}
|
||||
if((lid + i_e_ - get_warp_size()) == (num_experts - 1))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user