Rangify STL algorithms (#438)

* Rangify STL algorithms

This commit adapts rangified std::copy(), std::fill() & std::transform()

* Re-write more std::copy() calls

* Re-write std::copy() calls in profiler
This commit is contained in:
Po Yen Chen
2022-11-15 05:17:28 +08:00
committed by GitHub
parent b79bbbc22f
commit dc663fae29
14 changed files with 37 additions and 28 deletions

View File

@@ -142,7 +142,7 @@ bool reduce_blockwise_test(bool do_verification,
std::array<int, ShapeType::NumReduceDim_> arrReduceDims;
std::copy(reduceDims.begin(), reduceDims.end(), arrReduceDims.begin());
ck::ranges::copy(reduceDims, arrReduceDims.begin());
result = reduce_blockwise_impl<InOutDataType,
AccDataType,

View File

@@ -10,6 +10,7 @@
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
@@ -263,10 +264,10 @@ int reduce_blockwise_impl(bool do_verification,
std::array<index_t, NumOutDim> arrOutLengths;
std::array<index_t, NumOutDim> arrOutStrides;
std::copy(inLengths.begin(), inLengths.end(), arrInLengths.begin());
std::copy(inStrides.begin(), inStrides.end(), arrInStrides.begin());
std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin());
std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin());
ck::ranges::copy(inLengths, arrInLengths.begin());
ck::ranges::copy(inStrides, arrInStrides.begin());
ck::ranges::copy(outLengths, arrOutLengths.begin());
ck::ranges::copy(outStrides, arrOutStrides.begin());
auto reduce = DeviceReduceInstance{};

View File

@@ -221,12 +221,12 @@ int main(int argc, char* argv[])
std::array<index_t, 3> arrOutLengths;
std::array<index_t, 3> arrOutStrides;
std::copy(inLengths_1.begin(), inLengths_1.end(), arrInLengths_1.begin());
std::copy(inStrides_1.begin(), inStrides_1.end(), arrInStrides_1.begin());
std::copy(inLengths_2.begin(), inLengths_2.end(), arrInLengths_2.begin());
std::copy(inStrides_2.begin(), inStrides_2.end(), arrInStrides_2.begin());
std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin());
std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin());
ck::ranges::copy(inLengths_1, arrInLengths_1.begin());
ck::ranges::copy(inStrides_1, arrInStrides_1.begin());
ck::ranges::copy(inLengths_2, arrInLengths_2.begin());
ck::ranges::copy(inStrides_2, arrInStrides_2.begin());
ck::ranges::copy(outLengths, arrOutLengths.begin());
ck::ranges::copy(outStrides, arrOutStrides.begin());
auto reduce_1 = DeviceReduceInstance_1{};

View File

@@ -140,7 +140,7 @@ bool reduce_multiblock_atomic_add_test(bool do_verification,
std::array<int, ShapeType::NumReduceDim_> a_reduceDims;
std::copy(reduceDims.begin(), reduceDims.end(), a_reduceDims.begin());
ck::ranges::copy(reduceDims, a_reduceDims.begin());
result = reduce_multiblock_atomic_add_impl<InOutDataType,
AccDataType,

View File

@@ -10,6 +10,7 @@
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
@@ -176,10 +177,10 @@ int reduce_multiblock_atomic_add_impl(bool do_verification,
std::array<index_t, NumOutDim> arrOutLengths;
std::array<index_t, NumOutDim> arrOutStrides;
std::copy(inLengths.begin(), inLengths.end(), arrInLengths.begin());
std::copy(inStrides.begin(), inStrides.end(), arrInStrides.begin());
std::copy(outLengths.begin(), outLengths.end(), arrOutLengths.begin());
std::copy(outStrides.begin(), outStrides.end(), arrOutStrides.begin());
ck::ranges::copy(inLengths, arrInLengths.begin());
ck::ranges::copy(inStrides, arrInStrides.begin());
ck::ranges::copy(outLengths, arrOutLengths.begin());
ck::ranges::copy(outStrides, arrOutStrides.begin());
auto reduce = DeviceReduceInstance{};