From eb588bc0838a8a914843cf2cea15a7787f1cc314 Mon Sep 17 00:00:00 2001 From: mtgu0705 Date: Tue, 20 May 2025 10:00:13 -0500 Subject: [PATCH] update the TFlops calculation in the example --- .../67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp index 62bd702c80..51d4c8785a 100644 --- a/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp +++ b/example/67_gemm_microscaling/moe_gemm2_xdl_mx_fp4_bns.cpp @@ -362,7 +362,7 @@ int main(int argc, char* argv[]) DeviceMem b1_device_buf(sizeof(XDataType) * b1_e_n_k.GetElementSpaceSize()); DeviceMem d2_device_buf(sizeof(D2DataType) * d2_e_n.GetElementSpaceSize()); DeviceMem e_device_buf(sizeof(EDataType) * e_t_n_device_result.GetElementSpaceSize()); - d2_e_n.savetxt("weight.txt", "int"); + // d2_e_n.savetxt("weight.txt", "int"); // A scale sorted for(int i = 0; i < sorted_size; i++) @@ -504,11 +504,10 @@ int main(int argc, char* argv[]) printf("d2_e_n:\n"); for(int i = 0; i < sorted_size; ++i) { - for(int n = 0; n < N; ++n) + for(int n = 0; n < 1; ++n) { printf("%.2f ", ck::type_convert(d2_e_n(i, n))); } - printf("\n"); } #endif @@ -559,7 +558,9 @@ int main(int argc, char* argv[]) // not result correct here because output buf not setzero float ave_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel}); - std::size_t flop = std::size_t(2) * tokens * topk * N * K; + std::size_t flop = std::size_t(2) * tokens * topk * N * K + + std::size_t(2) * tokens * topk * N * K / ScaleBlockSize; + std::size_t num_btype = sizeof(A0DataType) / 2 * tokens * K * topk + sizeof(B0DataType) / 2 * K * N * experts + sizeof(EDataType) * tokens * N;