mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
[CK_TILE]Moe update index (#1672)
* update MOCK_ID for moe-sorting * add moe-smoothquant * update a comment * fix format * hot fix * update topk in overflow case * update comments * update bf16 cvt --------- Co-authored-by: valarLip <340077269@qq.com>
This commit is contained in:
@@ -8,6 +8,9 @@
|
||||
|
||||
namespace ck_tile {
|
||||
|
||||
#define MOE_SORTING_MOCK_ID(token_id_, topk_id_) \
|
||||
static_cast<uint32_t>(((token_id_)&0x00ffffff) | (((topk_id_)&0xff) << 24))
|
||||
|
||||
template <typename WeightType, typename IndexType = index_t>
|
||||
CK_TILE_HOST void reference_moe_sorting(const HostTensor<IndexType>& topk_ids,
|
||||
const HostTensor<WeightType>& weights,
|
||||
@@ -20,8 +23,14 @@ CK_TILE_HOST void reference_moe_sorting(const HostTensor<IndexType>& topk_ids,
|
||||
{
|
||||
const index_t num_token = topk_ids.mDesc.get_lengths()[0];
|
||||
const index_t topk = topk_ids.mDesc.get_lengths()[1];
|
||||
std::vector<std::vector<IndexType>> expert_tokens(experts,
|
||||
std::vector<IndexType>(unit_size, num_token));
|
||||
// allocate a temp buffer, and fill the value with [number_token|topk]
|
||||
std::vector<std::vector<IndexType>> expert_tokens(
|
||||
experts,
|
||||
#if CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
|
||||
std::vector<IndexType>(unit_size, MOE_SORTING_MOCK_ID(num_token, topk)));
|
||||
#else
|
||||
std::vector<IndexType>(unit_size, num_token));
|
||||
#endif
|
||||
std::vector<std::vector<WeightType>> expert_token_weights(
|
||||
experts, std::vector<WeightType>(unit_size, 0));
|
||||
std::vector<IndexType> expert_slices(experts, 1);
|
||||
@@ -42,12 +51,19 @@ CK_TILE_HOST void reference_moe_sorting(const HostTensor<IndexType>& topk_ids,
|
||||
expert_token_weights[e].resize(new_size);
|
||||
for(index_t i = (expert_slices[e] - 1) * unit_size; i < new_size; i++)
|
||||
{
|
||||
expert_tokens[e][i] = num_token;
|
||||
#if CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
|
||||
expert_tokens[e][i] = MOE_SORTING_MOCK_ID(num_token, topk);
|
||||
#else
|
||||
expert_tokens[e][i] = num_token;
|
||||
#endif
|
||||
expert_token_weights[e][i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
expert_tokens[e][idx] = t;
|
||||
#if CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
|
||||
expert_tokens[e][idx] = MOE_SORTING_MOCK_ID(t, k);
|
||||
#else
|
||||
expert_tokens[e][idx] = t;
|
||||
#endif
|
||||
expert_token_weights[e][idx] = w;
|
||||
expert_slice_idxs[e]++;
|
||||
}
|
||||
@@ -75,4 +91,7 @@ CK_TILE_HOST void reference_moe_sorting(const HostTensor<IndexType>& topk_ids,
|
||||
unit_cnt *= unit_size;
|
||||
return;
|
||||
}
|
||||
|
||||
#undef MOE_SORTING_MOCK_ID
|
||||
|
||||
} // namespace ck_tile
|
||||
|
||||
Reference in New Issue
Block a user