diff --git a/example/ck_tile/18_flatmm/mixed_prec/run_a16w4_moe_flatmm_example.inc b/example/ck_tile/18_flatmm/mixed_prec/run_a16w4_moe_flatmm_example.inc index f236332d62..476dc70ecd 100644 --- a/example/ck_tile/18_flatmm/mixed_prec/run_a16w4_moe_flatmm_example.inc +++ b/example/ck_tile/18_flatmm/mixed_prec/run_a16w4_moe_flatmm_example.inc @@ -36,7 +36,7 @@ float invoke_a16w4_moe_gemm(int n_warmup, int n_repeat, const MoeHostArgs& args) std::size_t flop = std::size_t(2) * args.M * args.N * args.K; std::size_t num_byte = sizeof(ADataType) * args.M * args.K + - sizeof(BDataType) * args.N * args.K / PackedSize + + sizeof(BDataType) * args.N * args.K * std::min(args.experts, args.NumTokens * args.TopK) / PackedSize + sizeof(CDataType) * args.M * args.N; float tflops = static_cast(flop) / 1.E9 / ave_time; float gb_per_sec = num_byte / 1.E6 / ave_time;