diff --git a/example/09_convnd_fwd/convnd_fwd_dl_common.hpp b/example/09_convnd_fwd/convnd_fwd_dl_common.hpp index 9fe09a1f2e..496e1a04fb 100644 --- a/example/09_convnd_fwd/convnd_fwd_dl_common.hpp +++ b/example/09_convnd_fwd/convnd_fwd_dl_common.hpp @@ -10,6 +10,7 @@ #include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" +#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/check_err.hpp" #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" @@ -88,7 +89,7 @@ bool run_grouped_conv_fwd_dl(bool do_verification, std::array input_left_pads{}; std::array input_right_pads{}; - auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; + auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); }; copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths); copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides); diff --git a/example/12_reduce/reduce_blockwise.cpp b/example/12_reduce/reduce_blockwise.cpp index fb9a6e6407..a7ee9990c1 100644 --- a/example/12_reduce/reduce_blockwise.cpp +++ b/example/12_reduce/reduce_blockwise.cpp @@ -142,7 +142,7 @@ bool reduce_blockwise_test(bool do_verification, std::array arrReduceDims; - std::copy(reduceDims.begin(), reduceDims.end(), arrReduceDims.begin()); + ck::ranges::copy(reduceDims, arrReduceDims.begin()); result = reduce_blockwise_impl arrOutLengths; std::array arrOutStrides; - std::copy(inLengths.begin(), inLengths.end(), arrInLengths.begin()); - std::copy(inStrides.begin(), inStrides.end(), arrInStrides.begin()); - std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin()); - std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin()); + ck::ranges::copy(inLengths, arrInLengths.begin()); + ck::ranges::copy(inStrides, arrInStrides.begin()); + ck::ranges::copy(outLengths, arrOutLengths.begin()); + ck::ranges::copy(outStrides, arrOutStrides.begin()); auto reduce = DeviceReduceInstance{}; diff --git a/example/12_reduce/reduce_blockwise_two_call.cpp b/example/12_reduce/reduce_blockwise_two_call.cpp index e668d31a17..39821f240a 100644 --- a/example/12_reduce/reduce_blockwise_two_call.cpp +++ b/example/12_reduce/reduce_blockwise_two_call.cpp @@ -221,12 +221,12 @@ int main(int argc, char* argv[]) std::array arrOutLengths; std::array arrOutStrides; - std::copy(inLengths_1.begin(), inLengths_1.end(), arrInLengths_1.begin()); - std::copy(inStrides_1.begin(), inStrides_1.end(), arrInStrides_1.begin()); - std::copy(inLengths_2.begin(), inLengths_2.end(), arrInLengths_2.begin()); - std::copy(inStrides_2.begin(), inStrides_2.end(), arrInStrides_2.begin()); - std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin()); - std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin()); + ck::ranges::copy(inLengths_1, arrInLengths_1.begin()); + ck::ranges::copy(inStrides_1, arrInStrides_1.begin()); + ck::ranges::copy(inLengths_2, arrInLengths_2.begin()); + ck::ranges::copy(inStrides_2, arrInStrides_2.begin()); + ck::ranges::copy(outLengths, arrOutLengths.begin()); + ck::ranges::copy(outStrides, arrOutStrides.begin()); auto reduce_1 = DeviceReduceInstance_1{}; diff --git a/example/12_reduce/reduce_multiblock_atomic_add.cpp b/example/12_reduce/reduce_multiblock_atomic_add.cpp index 90c04855b4..c4d63a3add 100644 --- a/example/12_reduce/reduce_multiblock_atomic_add.cpp +++ b/example/12_reduce/reduce_multiblock_atomic_add.cpp @@ -140,7 +140,7 @@ bool reduce_multiblock_atomic_add_test(bool do_verification, std::array a_reduceDims; - std::copy(reduceDims.begin(), reduceDims.end(), a_reduceDims.begin()); + ck::ranges::copy(reduceDims, a_reduceDims.begin()); result = reduce_multiblock_atomic_add_impl arrOutLengths; std::array arrOutStrides; - std::copy(inLengths.begin(), inLengths.end(), arrInLengths.begin()); - std::copy(inStrides.begin(), inStrides.end(), arrInStrides.begin()); - std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin()); - std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin()); + ck::ranges::copy(inLengths, arrInLengths.begin()); + ck::ranges::copy(inStrides, arrInStrides.begin()); + ck::ranges::copy(outLengths, arrOutLengths.begin()); + ck::ranges::copy(outStrides, arrOutStrides.begin()); auto reduce = DeviceReduceInstance{}; diff --git a/example/30_grouped_conv_fwd_multiple_d/common.hpp b/example/30_grouped_conv_fwd_multiple_d/common.hpp index 3995403607..d6d6dd6ff1 100644 --- a/example/30_grouped_conv_fwd_multiple_d/common.hpp +++ b/example/30_grouped_conv_fwd_multiple_d/common.hpp @@ -16,6 +16,7 @@ #include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" +#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/check_err.hpp" #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" diff --git a/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc index 27ddcb6bec..d087c31af5 100644 --- a/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc +++ b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc @@ -116,7 +116,7 @@ bool run_grouped_conv_fwd(const ExecutionConfig& config, std::array input_left_pads{}; std::array input_right_pads{}; - auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; + auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); }; copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths); copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides); diff --git a/example/38_grouped_conv_bwd_data_multiple_d/common.hpp b/example/38_grouped_conv_bwd_data_multiple_d/common.hpp index 6c21d32700..d07ee7bdc1 100644 --- a/example/38_grouped_conv_bwd_data_multiple_d/common.hpp +++ b/example/38_grouped_conv_bwd_data_multiple_d/common.hpp @@ -15,6 +15,7 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp" +#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/check_err.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/utility/convolution_parameter.hpp" diff --git a/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc index 480c69b387..e50c98bbe8 100644 --- a/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc +++ b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc @@ -52,7 +52,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config, std::array input_left_pads{}; std::array input_right_pads{}; - auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; + auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); }; copy(out_g_n_k_wos_desc.GetLengths(), a_g_n_k_wos_lengths); copy(out_g_n_k_wos_desc.GetStrides(), a_g_n_k_wos_strides); diff --git a/example/44_conv2d_fwd_quant/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp b/example/44_conv2d_fwd_quant/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp index 613f607d8b..51315de7ed 100644 --- a/example/44_conv2d_fwd_quant/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp +++ b/example/44_conv2d_fwd_quant/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp @@ -6,6 +6,7 @@ #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" +#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/check_err.hpp" #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" @@ -144,7 +145,7 @@ bool run_grouped_conv_fwd(bool do_verification, std::array input_left_pads{}; std::array input_right_pads{}; - auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; + auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); }; copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths); copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides); diff --git a/example/44_conv2d_fwd_quant/conv2d_fwd_xdl_perlayer_quantization_int8.cpp b/example/44_conv2d_fwd_quant/conv2d_fwd_xdl_perlayer_quantization_int8.cpp index 71472440c9..fa7f34cef0 100644 --- a/example/44_conv2d_fwd_quant/conv2d_fwd_xdl_perlayer_quantization_int8.cpp +++ b/example/44_conv2d_fwd_quant/conv2d_fwd_xdl_perlayer_quantization_int8.cpp @@ -6,6 +6,7 @@ #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" +#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/check_err.hpp" #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" @@ -131,7 +132,7 @@ bool run_grouped_conv_fwd(bool do_verification, std::array input_left_pads{}; std::array input_right_pads{}; - auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; + auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); }; copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths); copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides); diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp index 0ae9d5fd82..0bbdbe52b9 100644 --- a/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp +++ b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp @@ -5,6 +5,7 @@ #include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp" #include "ck/tensor_operation/gpu/device/impl/device_elementwise.hpp" +#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/check_err.hpp" #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" @@ -69,7 +70,7 @@ int main() static_cast(nhwc[2] * nhwc[3]), static_cast(nhwc[3])}; - std::copy(nchw.begin(), nchw.end(), ab_lengths.begin()); + ck::ranges::copy(nchw, ab_lengths.begin()); auto broadcastPermute = DeviceElementwisePermuteInstance{}; auto argument = broadcastPermute.MakeArgumentPointer( diff --git a/profiler/include/profile_reduce_impl.hpp b/profiler/include/profile_reduce_impl.hpp index 354e6e46fa..ccb99398f2 100644 --- a/profiler/include/profile_reduce_impl.hpp +++ b/profiler/include/profile_reduce_impl.hpp @@ -6,8 +6,9 @@ #include "ck/utility/reduction_enums.hpp" #include "ck/tensor_operation/gpu/device/device_reduce.hpp" -#include "ck/library/utility/check_err.hpp" #include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp" +#include "ck/library/utility/algorithm.hpp" +#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_reduction.hpp" #include "ck/library/utility/host_common_util.hpp" @@ -359,10 +360,10 @@ bool profile_reduce_impl_impl(bool do_verification, std::array arrOutLengths; std::array arrOutStrides; - std::copy(inLengths.begin(), inLengths.end(), arrInLengths.begin()); - std::copy(inStrides.begin(), inStrides.end(), arrInStrides.begin()); - std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin()); - std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin()); + ck::ranges::copy(inLengths, arrInLengths.begin()); + ck::ranges::copy(inStrides, arrInStrides.begin()); + ck::ranges::copy(outLengths, arrOutLengths.begin()); + ck::ranges::copy(outStrides, arrOutStrides.begin()); for(auto& reduce_ptr : reduce_ptrs) { @@ -491,7 +492,7 @@ bool profile_reduce_impl(bool do_verification, std::array arrReduceDims; - std::copy(reduceDims.begin(), reduceDims.end(), arrReduceDims.begin()); + ck::ranges::copy(reduceDims, arrReduceDims.begin()); pass = pass && profile_reduce_impl_impl