diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp index a2940fa755..30e29cc8e2 100644 --- a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp @@ -611,11 +611,12 @@ struct DeviceGroupedGemmSoftmaxGemmPermute_Xdl_CShuffle some_has_main_k_block_loop |= y; } - hipGetErrorString(hipMemcpyWithStream(arg.p_workspace_, - arg.group_kernel_args_.data(), - arg.group_kernel_args_.size() * sizeof(GroupKernelArg), - hipMemcpyHostToDevice, - stream_config.stream_id_)); + hipGetErrorString( + hipMemcpyWithStream(arg.p_workspace_, + arg.group_kernel_args_.data(), + arg.group_kernel_args_.size() * sizeof(GroupKernelArg), + hipMemcpyHostToDevice, + stream_config.stream_id_)); float ave_time = 0; diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp index efe3a69ac6..5775ff3971 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp @@ -655,7 +655,7 @@ struct DeviceGroupedContractionMultipleD_Xdl_CShuffle hipGetErrorString(hipMemcpyWithStream(arg.p_workspace_, arg.contraction_multi_d_kernel_args_.data(), arg.contraction_multi_d_kernel_args_.size() * - sizeof(ContractionMultiDKernelArg), + sizeof(ContractionMultiDKernelArg), hipMemcpyHostToDevice, stream_config.stream_id_)); diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp index 2a3e2b6cf2..22be58259a 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp @@ -597,11 +597,12 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm