diff --git a/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp b/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp
index 56d3b48547..12d28f572c 100644
--- a/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp
@@ -253,12 +253,12 @@ struct DeviceMoeGemmBlockScale
                         // rotating mem
                         rotating_mem.Next();
                         // clear c mem
-                        // if(arg_.KBatch > 1)
-                        //     hipGetErrorString(hipMemsetAsync(arg_.p_c_grid,
-                        //                                      0,
-                        //                                      arg_.M * arg_.N * sizeof(CDataType)
-                        //                                      * (IsInputGemm && IsSplitK ? 2 : 1),
-                        //                                      stream_config.stream_id_));
+                        if(arg_.KBatch > 1)
+                            hipGetErrorString(hipMemsetAsync(arg_.p_c_grid,
+                                                             0,
+                                                             arg_.M * arg_.N * sizeof(CDataType) *
+                                                                 (IsInputGemm && IsSplitK ? 2 : 1),
+                                                             stream_config.stream_id_));
                     };
 
                     ave_time = ck::utility::launch_and_time_kernel_with_preprocess<false>(
@@ -272,12 +272,12 @@ struct DeviceMoeGemmBlockScale
                 }
                 else
                 {
-                    // if(arg.KBatch > 1)
-                    //     hipGetErrorString(hipMemsetAsync(arg.p_c_grid,
-                    //                                      0,
-                    //                                      arg.M * arg.N * sizeof(CDataType) *
-                    //                                      (IsInputGemm && IsSplitK ? 2 : 1),
-                    //                                      stream_config.stream_id_));
+                    if(arg.KBatch > 1)
+                        hipGetErrorString(hipMemsetAsync(arg.p_c_grid,
+                                                         0,
+                                                         arg.M * arg.N * sizeof(CDataType) *
+                                                             (IsInputGemm && IsSplitK ? 2 : 1),
+                                                         stream_config.stream_id_));
 
                     ave_time = launch_and_time_kernel(
                         stream_config, kernel, dim3(gdx, gdy, gdz), dim3(BlockSize), 0, arg);