From e66c3993ecadd06ab72bcc3132bc2b182cfb170a Mon Sep 17 00:00:00 2001 From: who who who Date: Fri, 2 Jun 2023 05:23:41 +0800 Subject: [PATCH] replace hipMemcpy with hipMemcpyWithStream (#734) [ROCm/composable_kernel commit: e2ebc8e79599531e2b837c27b8aaa0404c5c58ce] --- ...grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp | 9 +++++---- ...ce_grouped_contraction_multiple_d_xdl_cshuffle.hpp | 11 ++++++----- .../device/impl/device_grouped_gemm_multiple_d_dl.hpp | 9 +++++---- .../gpu/device/impl/device_grouped_gemm_xdl.hpp | 9 +++++---- .../impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp | 9 +++++---- 5 files changed, 26 insertions(+), 21 deletions(-) diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp index 9c8b5f462e..a2940fa755 100644 --- a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp @@ -611,10 +611,11 @@ struct DeviceGroupedGemmSoftmaxGemmPermute_Xdl_CShuffle some_has_main_k_block_loop |= y; } - hipGetErrorString(hipMemcpy(arg.p_workspace_, - arg.group_kernel_args_.data(), - arg.group_kernel_args_.size() * sizeof(GroupKernelArg), - hipMemcpyHostToDevice)); + hipGetErrorString(hipMemcpyWithStream(arg.p_workspace_, + arg.group_kernel_args_.data(), + arg.group_kernel_args_.size() * sizeof(GroupKernelArg), + hipMemcpyHostToDevice, + stream_config.stream_id_)); float ave_time = 0; diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp index a275ee1023..efe3a69ac6 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp @@ -652,11 +652,12 @@ struct DeviceGroupedContractionMultipleD_Xdl_CShuffle } } - hipGetErrorString(hipMemcpy(arg.p_workspace_, - arg.contraction_multi_d_kernel_args_.data(), - arg.contraction_multi_d_kernel_args_.size() * - sizeof(ContractionMultiDKernelArg), - hipMemcpyHostToDevice)); + hipGetErrorString(hipMemcpyWithStream(arg.p_workspace_, + arg.contraction_multi_d_kernel_args_.data(), + arg.contraction_multi_d_kernel_args_.size() * + sizeof(ContractionMultiDKernelArg), + hipMemcpyHostToDevice, + stream_config.stream_id_)); float ave_time = 0; diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp index d1f1b7fcc9..2a3e2b6cf2 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp @@ -597,10 +597,11 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm