use int64_t as expert stride to avoid overflow

This commit is contained in:
Feng Shijie
2025-08-21 06:58:55 +00:00
parent 9fbcc8f8a4
commit 85976b0b87
3 changed files with 19 additions and 18 deletions

View File

@@ -644,7 +644,8 @@ struct MoeFlatmmKernel
});
const SplitKBatchOffset splitk_batch_offset(kargs);
const index_t expert_stride = __builtin_amdgcn_readfirstlane(kargs.N * kargs.K);
const long_index_t expert_stride =
__builtin_amdgcn_readfirstlane(long_index_t(kargs.N) * kargs.K);
const ADataType* a_ptr =
static_cast<const ADataType*>(kargs.a_ptr) + splitk_batch_offset.a_k_split_offset;