mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 21:21:22 +00:00
Huaiguxu/moe fp8 pertoken scale fix (#2391)
* fix pertoken_scale a_scale dimension * clang-format * Fix moe_gemm2_fp8 perTokenScale reference and example.
This commit is contained in:
@@ -1473,7 +1473,12 @@ struct GridwiseMoeGemm
|
||||
index_t fused_token = scale_token_ids.AsType<index_t>()[m4];
|
||||
const index_t token_offset = fused_token & 0xffffff;
|
||||
return token_offset < problem.NumTokens
|
||||
? p_sorted_weights_0[token_offset]
|
||||
? p_sorted_weights_0[IsInputGemm
|
||||
? token_offset
|
||||
: token_offset *
|
||||
problem.TopK +
|
||||
(fused_token >>
|
||||
24)]
|
||||
: 0.0;
|
||||
}
|
||||
else
|
||||
@@ -2190,7 +2195,12 @@ struct GridwiseMoeGemm
|
||||
index_t fused_token = scale_token_ids.AsType<index_t>()[m4];
|
||||
const index_t token_offset = fused_token & 0xffffff;
|
||||
return token_offset < problem.NumTokens
|
||||
? p_sorted_weights_0[token_offset]
|
||||
? p_sorted_weights_0[IsInputGemm
|
||||
? token_offset
|
||||
: token_offset *
|
||||
problem.TopK +
|
||||
(fused_token >>
|
||||
24)]
|
||||
: 0.0;
|
||||
}
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user