mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Rangify STL algorithms (#438)
* Rangify STL algorithms
This commit adapts rangified std::copy(), std::fill() & std::transform()
* Re-write more std::copy() calls
* Re-write std::copy() calls in profiler
[ROCm/composable_kernel commit: dc663fae29]
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
@@ -88,7 +89,7 @@ bool run_grouped_conv_fwd_dl(bool do_verification,
|
||||
std::array<ck::index_t, NDimSpatial> input_left_pads{};
|
||||
std::array<ck::index_t, NDimSpatial> input_right_pads{};
|
||||
|
||||
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };
|
||||
auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
|
||||
|
||||
copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
|
||||
copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
|
||||
|
||||
@@ -142,7 +142,7 @@ bool reduce_blockwise_test(bool do_verification,
|
||||
|
||||
std::array<int, ShapeType::NumReduceDim_> arrReduceDims;
|
||||
|
||||
std::copy(reduceDims.begin(), reduceDims.end(), arrReduceDims.begin());
|
||||
ck::ranges::copy(reduceDims, arrReduceDims.begin());
|
||||
|
||||
result = reduce_blockwise_impl<InOutDataType,
|
||||
AccDataType,
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp"
|
||||
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
@@ -263,10 +264,10 @@ int reduce_blockwise_impl(bool do_verification,
|
||||
std::array<index_t, NumOutDim> arrOutLengths;
|
||||
std::array<index_t, NumOutDim> arrOutStrides;
|
||||
|
||||
std::copy(inLengths.begin(), inLengths.end(), arrInLengths.begin());
|
||||
std::copy(inStrides.begin(), inStrides.end(), arrInStrides.begin());
|
||||
std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin());
|
||||
std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin());
|
||||
ck::ranges::copy(inLengths, arrInLengths.begin());
|
||||
ck::ranges::copy(inStrides, arrInStrides.begin());
|
||||
ck::ranges::copy(outLengths, arrOutLengths.begin());
|
||||
ck::ranges::copy(outStrides, arrOutStrides.begin());
|
||||
|
||||
auto reduce = DeviceReduceInstance{};
|
||||
|
||||
|
||||
@@ -221,12 +221,12 @@ int main(int argc, char* argv[])
|
||||
std::array<index_t, 3> arrOutLengths;
|
||||
std::array<index_t, 3> arrOutStrides;
|
||||
|
||||
std::copy(inLengths_1.begin(), inLengths_1.end(), arrInLengths_1.begin());
|
||||
std::copy(inStrides_1.begin(), inStrides_1.end(), arrInStrides_1.begin());
|
||||
std::copy(inLengths_2.begin(), inLengths_2.end(), arrInLengths_2.begin());
|
||||
std::copy(inStrides_2.begin(), inStrides_2.end(), arrInStrides_2.begin());
|
||||
std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin());
|
||||
std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin());
|
||||
ck::ranges::copy(inLengths_1, arrInLengths_1.begin());
|
||||
ck::ranges::copy(inStrides_1, arrInStrides_1.begin());
|
||||
ck::ranges::copy(inLengths_2, arrInLengths_2.begin());
|
||||
ck::ranges::copy(inStrides_2, arrInStrides_2.begin());
|
||||
ck::ranges::copy(outLengths, arrOutLengths.begin());
|
||||
ck::ranges::copy(outStrides, arrOutStrides.begin());
|
||||
|
||||
auto reduce_1 = DeviceReduceInstance_1{};
|
||||
|
||||
|
||||
@@ -140,7 +140,7 @@ bool reduce_multiblock_atomic_add_test(bool do_verification,
|
||||
|
||||
std::array<int, ShapeType::NumReduceDim_> a_reduceDims;
|
||||
|
||||
std::copy(reduceDims.begin(), reduceDims.end(), a_reduceDims.begin());
|
||||
ck::ranges::copy(reduceDims, a_reduceDims.begin());
|
||||
|
||||
result = reduce_multiblock_atomic_add_impl<InOutDataType,
|
||||
AccDataType,
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp"
|
||||
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
@@ -176,10 +177,10 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
|
||||
std::array<index_t, NumOutDim> arrOutLengths;
|
||||
std::array<index_t, NumOutDim> arrOutStrides;
|
||||
|
||||
std::copy(inLengths.begin(), inLengths.end(), arrInLengths.begin());
|
||||
std::copy(inStrides.begin(), inStrides.end(), arrInStrides.begin());
|
||||
std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin());
|
||||
std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin());
|
||||
ck::ranges::copy(inLengths, arrInLengths.begin());
|
||||
ck::ranges::copy(inStrides, arrInStrides.begin());
|
||||
ck::ranges::copy(outLengths, arrOutLengths.begin());
|
||||
ck::ranges::copy(outStrides, arrOutStrides.begin());
|
||||
|
||||
auto reduce = DeviceReduceInstance{};
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
|
||||
@@ -116,7 +116,7 @@ bool run_grouped_conv_fwd(const ExecutionConfig& config,
|
||||
std::array<ck::index_t, NDimSpatial> input_left_pads{};
|
||||
std::array<ck::index_t, NDimSpatial> input_right_pads{};
|
||||
|
||||
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };
|
||||
auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
|
||||
|
||||
copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
|
||||
copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp"
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
|
||||
#include "ck/library/utility/convolution_parameter.hpp"
|
||||
|
||||
@@ -52,7 +52,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config,
|
||||
std::array<ck::index_t, NDimSpatial> input_left_pads{};
|
||||
std::array<ck::index_t, NDimSpatial> input_right_pads{};
|
||||
|
||||
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };
|
||||
auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
|
||||
|
||||
copy(out_g_n_k_wos_desc.GetLengths(), a_g_n_k_wos_lengths);
|
||||
copy(out_g_n_k_wos_desc.GetStrides(), a_g_n_k_wos_strides);
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
@@ -144,7 +145,7 @@ bool run_grouped_conv_fwd(bool do_verification,
|
||||
std::array<ck::index_t, NDimSpatial> input_left_pads{};
|
||||
std::array<ck::index_t, NDimSpatial> input_right_pads{};
|
||||
|
||||
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };
|
||||
auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
|
||||
|
||||
copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
|
||||
copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
@@ -131,7 +132,7 @@ bool run_grouped_conv_fwd(bool do_verification,
|
||||
std::array<ck::index_t, NDimSpatial> input_left_pads{};
|
||||
std::array<ck::index_t, NDimSpatial> input_right_pads{};
|
||||
|
||||
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };
|
||||
auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
|
||||
|
||||
copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
|
||||
copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/impl/device_elementwise.hpp"
|
||||
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
@@ -69,7 +70,7 @@ int main()
|
||||
static_cast<int>(nhwc[2] * nhwc[3]),
|
||||
static_cast<int>(nhwc[3])};
|
||||
|
||||
std::copy(nchw.begin(), nchw.end(), ab_lengths.begin());
|
||||
ck::ranges::copy(nchw, ab_lengths.begin());
|
||||
|
||||
auto broadcastPermute = DeviceElementwisePermuteInstance{};
|
||||
auto argument = broadcastPermute.MakeArgumentPointer(
|
||||
|
||||
@@ -6,8 +6,9 @@
|
||||
#include "ck/utility/reduction_enums.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_reduce.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp"
|
||||
#include "ck/library/utility/algorithm.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_reduction.hpp"
|
||||
#include "ck/library/utility/host_common_util.hpp"
|
||||
@@ -359,10 +360,10 @@ bool profile_reduce_impl_impl(bool do_verification,
|
||||
std::array<index_t, NumOutDim> arrOutLengths;
|
||||
std::array<index_t, NumOutDim> arrOutStrides;
|
||||
|
||||
std::copy(inLengths.begin(), inLengths.end(), arrInLengths.begin());
|
||||
std::copy(inStrides.begin(), inStrides.end(), arrInStrides.begin());
|
||||
std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin());
|
||||
std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin());
|
||||
ck::ranges::copy(inLengths, arrInLengths.begin());
|
||||
ck::ranges::copy(inStrides, arrInStrides.begin());
|
||||
ck::ranges::copy(outLengths, arrOutLengths.begin());
|
||||
ck::ranges::copy(outStrides, arrOutStrides.begin());
|
||||
|
||||
for(auto& reduce_ptr : reduce_ptrs)
|
||||
{
|
||||
@@ -491,7 +492,7 @@ bool profile_reduce_impl(bool do_verification,
|
||||
|
||||
std::array<ck::index_t, descType::NumReduceDim_> arrReduceDims;
|
||||
|
||||
std::copy(reduceDims.begin(), reduceDims.end(), arrReduceDims.begin());
|
||||
ck::ranges::copy(reduceDims, arrReduceDims.begin());
|
||||
|
||||
pass = pass && profile_reduce_impl_impl<InDataType,
|
||||
AccDataType,
|
||||
|
||||
Reference in New Issue
Block a user