mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 21:21:22 +00:00
replace hipMemcpy with hipMemcpyWithStream (#734)
This commit is contained in:
@@ -611,10 +611,11 @@ struct DeviceGroupedGemmSoftmaxGemmPermute_Xdl_CShuffle
|
||||
some_has_main_k_block_loop |= y;
|
||||
}
|
||||
|
||||
hipGetErrorString(hipMemcpy(arg.p_workspace_,
|
||||
arg.group_kernel_args_.data(),
|
||||
arg.group_kernel_args_.size() * sizeof(GroupKernelArg),
|
||||
hipMemcpyHostToDevice));
|
||||
hipGetErrorString(hipMemcpyWithStream(arg.p_workspace_,
|
||||
arg.group_kernel_args_.data(),
|
||||
arg.group_kernel_args_.size() * sizeof(GroupKernelArg),
|
||||
hipMemcpyHostToDevice,
|
||||
stream_config.stream_id_));
|
||||
|
||||
float ave_time = 0;
|
||||
|
||||
|
||||
@@ -652,11 +652,12 @@ struct DeviceGroupedContractionMultipleD_Xdl_CShuffle
|
||||
}
|
||||
}
|
||||
|
||||
hipGetErrorString(hipMemcpy(arg.p_workspace_,
|
||||
arg.contraction_multi_d_kernel_args_.data(),
|
||||
arg.contraction_multi_d_kernel_args_.size() *
|
||||
sizeof(ContractionMultiDKernelArg),
|
||||
hipMemcpyHostToDevice));
|
||||
hipGetErrorString(hipMemcpyWithStream(arg.p_workspace_,
|
||||
arg.contraction_multi_d_kernel_args_.data(),
|
||||
arg.contraction_multi_d_kernel_args_.size() *
|
||||
sizeof(ContractionMultiDKernelArg),
|
||||
hipMemcpyHostToDevice,
|
||||
stream_config.stream_id_));
|
||||
|
||||
float ave_time = 0;
|
||||
|
||||
|
||||
@@ -597,10 +597,11 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
|
||||
}
|
||||
}
|
||||
|
||||
hipGetErrorString(hipMemcpy(arg.p_workspace_,
|
||||
arg.gemm_desc_kernel_arg_.data(),
|
||||
arg.gemm_desc_kernel_arg_.size() * sizeof(GemmKernelArg),
|
||||
hipMemcpyHostToDevice));
|
||||
hipGetErrorString(hipMemcpyWithStream(arg.p_workspace_,
|
||||
arg.gemm_desc_kernel_arg_.data(),
|
||||
arg.gemm_desc_kernel_arg_.size() * sizeof(GemmKernelArg),
|
||||
hipMemcpyHostToDevice,
|
||||
stream_config.stream_id_));
|
||||
|
||||
auto launch_kernel = [&](auto has_main_k_block_loop,
|
||||
auto has_double_tail_k_block_loop) {
|
||||
|
||||
@@ -549,10 +549,11 @@ struct DeviceGroupedGemm_Xdl : public DeviceGroupedGemm<ALayout,
|
||||
}
|
||||
|
||||
hipGetErrorString(
|
||||
hipMemcpy(arg.p_workspace_,
|
||||
arg.gemm_desc_kernel_arg_.data(),
|
||||
arg.gemm_desc_kernel_arg_.size() * sizeof(GemmBiasTransKernelArg),
|
||||
hipMemcpyHostToDevice));
|
||||
hipMemcpyWithStream(arg.p_workspace_,
|
||||
arg.gemm_desc_kernel_arg_.data(),
|
||||
arg.gemm_desc_kernel_arg_.size() * sizeof(GemmBiasTransKernelArg),
|
||||
hipMemcpyHostToDevice,
|
||||
stream_config.stream_id_));
|
||||
|
||||
float ave_time = 0;
|
||||
|
||||
|
||||
@@ -406,10 +406,11 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
|
||||
}
|
||||
}
|
||||
|
||||
hip_check_error(hipMemcpy(arg.p_workspace_,
|
||||
arg.gemm_kernel_args_.data(),
|
||||
arg.gemm_kernel_args_.size() * sizeof(GemmTransKernelArg),
|
||||
hipMemcpyHostToDevice));
|
||||
hip_check_error(hipMemcpyWithStream(arg.p_workspace_,
|
||||
arg.gemm_kernel_args_.data(),
|
||||
arg.gemm_kernel_args_.size() * sizeof(GemmTransKernelArg),
|
||||
hipMemcpyHostToDevice,
|
||||
stream_config.stream_id_));
|
||||
|
||||
float ave_time = 0;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user