diff --git a/example/65_gemm_multiply_multiply/hsa/moe_bs_stage2_v3_128x128x128.co b/example/65_gemm_multiply_multiply/hsa/moe_bs_stage2_v3_128x128x128.co index 128657224d..b402f82f33 100755 Binary files a/example/65_gemm_multiply_multiply/hsa/moe_bs_stage2_v3_128x128x128.co and b/example/65_gemm_multiply_multiply/hsa/moe_bs_stage2_v3_128x128x128.co differ diff --git a/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp b/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp index a8466a311b..143f1f85d2 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_moe_gemm_blockscale.hpp @@ -491,30 +491,30 @@ struct DeviceMoeGemmBlockScale RunKernel(kernel); } } - // else if constexpr(BlkGemmPipelineVer == BlockGemmPipelineVersion::v2 || - // BlkGemmPipelineVer == BlockGemmPipelineVersion::v3) - // { - // if(GridwiseGemm::CalculateKBlockLoopTailNum(K_split) == TailNumber::Odd) - // { - // const auto kernel = kernel_moe_gemm_2lds; - // RunKernel(kernel); - // } - // else - // { - // const auto kernel = kernel_moe_gemm_2lds; - // RunKernel(kernel); - // } - // } + else if constexpr(BlkGemmPipelineVer == BlockGemmPipelineVersion::v2 || + BlkGemmPipelineVer == BlockGemmPipelineVersion::v3) + { + if(GridwiseGemm::CalculateKBlockLoopTailNum(K_split) == TailNumber::Odd) + { + const auto kernel = kernel_moe_gemm_2lds; + RunKernel(kernel); + } + else + { + const auto kernel = kernel_moe_gemm_2lds; + RunKernel(kernel); + } + } } #endif #endif