mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-29 19:28:33 +00:00
fixed int4 moe tflops calculation.
This commit is contained in:
@@ -458,7 +458,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
std::size_t flop = std::size_t(2) * tokens * topk * N * K;
|
||||
std::size_t num_btype =
|
||||
sizeof(A0DataType) * valid_tile_num * K + sizeof(B0DataType) * K * N * experts + sizeof(EDataType) * valid_tile_num * N;
|
||||
sizeof(A0DataType) * valid_tile_num * K + sizeof(B0DataType) / 2 * K * N * experts + sizeof(EDataType) * valid_tile_num * N;
|
||||
|
||||
float tflops = static_cast<float>(flop) / 1.E9 / ave_time;
|
||||
|
||||
|
||||
@@ -422,7 +422,7 @@ int main(int argc, char* argv[])
|
||||
|
||||
std::size_t flop = std::size_t(2) * tokens * topk * N * K;
|
||||
std::size_t num_btype =
|
||||
sizeof(A0DataType) * tokens * K * topk + sizeof(B0DataType) * K * N * experts + sizeof(EDataType) * tokens * N;
|
||||
sizeof(A0DataType) * tokens * K * topk + sizeof(B0DataType) / 2 * K * N * experts + sizeof(EDataType) * tokens * N;
|
||||
|
||||
float tflops = static_cast<float>(flop) / 1.E9 / ave_time;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user