diff --git a/client_example/04_contraction/contraction_bilinear.cpp b/client_example/04_contraction/contraction_bilinear.cpp index b71c51c026..91dead41a4 100644 --- a/client_example/04_contraction/contraction_bilinear.cpp +++ b/client_example/04_contraction/contraction_bilinear.cpp @@ -12,6 +12,7 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp" +#include "ck/library/utility/numeric.hpp" using F32 = float; @@ -192,20 +193,14 @@ int main(int argc, char* argv[]) { float ave_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, true}); - ck::index_t M = std::accumulate(e_ms_ns_lengths.begin(), - e_ms_ns_lengths.begin() + NumDimM, - ck::index_t{1}, - std::multiplies{}); + ck::index_t M = ck::accumulate_n( + e_ms_ns_lengths.begin(), NumDimM, 1, std::multiplies<>{}); - ck::index_t N = std::accumulate(e_ms_ns_lengths.begin() + NumDimM, - e_ms_ns_lengths.begin() + NumDimM + NumDimN, - ck::index_t{1}, - std::multiplies{}); + ck::index_t N = ck::accumulate_n( + e_ms_ns_lengths.begin() + NumDimM, NumDimN, 1, std::multiplies<>{}); - ck::index_t K = std::accumulate(a_ms_ks_lengths.begin() + NumDimM, - a_ms_ks_lengths.begin() + NumDimM + NumDimK, - ck::index_t{1}, - std::multiplies{}); + ck::index_t K = ck::accumulate_n( + a_ms_ks_lengths.begin() + NumDimM, NumDimK, 1, std::multiplies<>{}); std::size_t flop = std::size_t(2) * M * N * K; std::size_t num_btype = sizeof(ADataType) * M * K + sizeof(BDataType) * K * N + diff --git a/client_example/04_contraction/contraction_scale.cpp b/client_example/04_contraction/contraction_scale.cpp index 5908c1d86e..4e08ee19cd 100644 --- a/client_example/04_contraction/contraction_scale.cpp +++ b/client_example/04_contraction/contraction_scale.cpp @@ -12,6 +12,7 @@ #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/library/tensor_operation_instance/gpu/contraction_scale.hpp" +#include "ck/library/utility/numeric.hpp" using F32 = float; @@ -178,20 +179,14 @@ int main(int argc, char* argv[]) { float ave_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, true}); - ck::index_t M = std::accumulate(e_ms_ns_lengths.begin(), - e_ms_ns_lengths.begin() + NumDimM, - ck::index_t{1}, - std::multiplies{}); + ck::index_t M = ck::accumulate_n( + e_ms_ns_lengths.begin(), NumDimM, 1, std::multiplies<>{}); - ck::index_t N = std::accumulate(e_ms_ns_lengths.begin() + NumDimM, - e_ms_ns_lengths.begin() + NumDimM + NumDimN, - ck::index_t{1}, - std::multiplies{}); + ck::index_t N = ck::accumulate_n( + e_ms_ns_lengths.begin() + NumDimM, NumDimN, 1, std::multiplies<>{}); - ck::index_t K = std::accumulate(a_ms_ks_lengths.begin() + NumDimM, - a_ms_ks_lengths.begin() + NumDimM + NumDimK, - ck::index_t{1}, - std::multiplies{}); + ck::index_t K = ck::accumulate_n( + a_ms_ks_lengths.begin() + NumDimM, NumDimK, 1, std::multiplies<>{}); std::size_t flop = std::size_t(2) * M * N * K; std::size_t num_btype = diff --git a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp index 02eba871c7..c934d35019 100644 --- a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp +++ b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp @@ -15,6 +15,7 @@ #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#include "ck/library/utility/numeric.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp" @@ -317,20 +318,14 @@ int main(int argc, char* argv[]) float ave_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel}); - std::size_t M = std::accumulate(e_gs_ms_ns_lengths.begin() + NumDimG, - e_gs_ms_ns_lengths.begin() + NumDimG + NumDimM, - ck::index_t{1}, - std::multiplies{}); + std::size_t M = ck::accumulate_n( + e_gs_ms_ns_lengths.begin() + NumDimG, NumDimM, 1, std::multiplies<>{}); - std::size_t N = std::accumulate(e_gs_ms_ns_lengths.begin() + NumDimG + NumDimM, - e_gs_ms_ns_lengths.begin() + NumDimG + NumDimM + NumDimN, - ck::index_t{1}, - std::multiplies{}); + std::size_t N = ck::accumulate_n( + e_gs_ms_ns_lengths.begin() + NumDimG + NumDimM, NumDimN, 1, std::multiplies<>{}); - std::size_t K = std::accumulate(a_gs_ms_ks_lengths.begin() + NumDimG + NumDimM, - a_gs_ms_ks_lengths.begin() + NumDimG + NumDimM + NumDimK, - ck::index_t{1}, - std::multiplies{}); + std::size_t K = ck::accumulate_n( + a_gs_ms_ks_lengths.begin() + NumDimG + NumDimM, NumDimK, 1, std::multiplies<>{}); std::size_t flop = std::size_t(2) * M * N * K; std::size_t num_btype = sizeof(ADataType) * M * K + sizeof(BDataType) * K * N + diff --git a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp index 0becfbd758..98835f98fa 100644 --- a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp +++ b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp @@ -15,6 +15,7 @@ #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#include "ck/library/utility/numeric.hpp" template using S = ck::Sequence; @@ -317,20 +318,14 @@ int main(int argc, char* argv[]) float ave_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel}); - ck::index_t M = std::accumulate(e_gs_ms_ns_lengths.begin(), - e_gs_ms_ns_lengths.begin() + NumDimM, - ck::index_t{1}, - std::multiplies{}); + ck::index_t M = + ck::accumulate_n(e_gs_ms_ns_lengths.begin(), NumDimM, 1, std::multiplies<>{}); - ck::index_t N = std::accumulate(e_gs_ms_ns_lengths.begin() + NumDimM, - e_gs_ms_ns_lengths.begin() + NumDimM + NumDimN, - ck::index_t{1}, - std::multiplies{}); + ck::index_t N = ck::accumulate_n( + e_gs_ms_ns_lengths.begin() + NumDimM, NumDimN, 1, std::multiplies<>{}); - ck::index_t K = std::accumulate(a_gs_ms_ks_lengths.begin() + NumDimM, - a_gs_ms_ks_lengths.begin() + NumDimM + NumDimK, - ck::index_t{1}, - std::multiplies{}); + ck::index_t K = ck::accumulate_n( + a_gs_ms_ks_lengths.begin() + NumDimM, NumDimK, 1, std::multiplies<>{}); std::size_t flop = std::size_t(2) * M * N * K; std::size_t num_btype = sizeof(ADataType) * M * K + sizeof(BDataType) * K * N + diff --git a/example/26_contraction/contraction_bilinear_xdl_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_fp32.cpp index e9e7ce02f3..ea105e4ff2 100644 --- a/example/26_contraction/contraction_bilinear_xdl_fp32.cpp +++ b/example/26_contraction/contraction_bilinear_xdl_fp32.cpp @@ -15,6 +15,7 @@ #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#include "ck/library/utility/numeric.hpp" template using S = ck::Sequence; @@ -358,20 +359,14 @@ int main(int argc, char* argv[]) float ave_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel}); - ck::index_t M = std::accumulate(e_ms_ns_lengths.begin(), - e_ms_ns_lengths.begin() + NumDimM, - ck::index_t{1}, - std::multiplies{}); + ck::index_t M = + ck::accumulate_n(e_ms_ns_lengths.begin(), NumDimM, 1, std::multiplies<>{}); - ck::index_t N = std::accumulate(e_ms_ns_lengths.begin() + NumDimM, - e_ms_ns_lengths.begin() + NumDimM + NumDimN, - ck::index_t{1}, - std::multiplies{}); + ck::index_t N = ck::accumulate_n( + e_ms_ns_lengths.begin() + NumDimM, NumDimN, 1, std::multiplies<>{}); - ck::index_t K = std::accumulate(a_ms_ks_lengths.begin() + NumDimM, - a_ms_ks_lengths.begin() + NumDimM + NumDimK, - ck::index_t{1}, - std::multiplies{}); + ck::index_t K = ck::accumulate_n( + a_ms_ks_lengths.begin() + NumDimM, NumDimK, 1, std::multiplies<>{}); std::size_t flop = std::size_t(2) * M * N * K; std::size_t num_btype = sizeof(ADataType) * M * K + sizeof(BDataType) * K * N + diff --git a/example/26_contraction/contraction_scale_xdl_fp32.cpp b/example/26_contraction/contraction_scale_xdl_fp32.cpp index 4447030905..26f176b059 100644 --- a/example/26_contraction/contraction_scale_xdl_fp32.cpp +++ b/example/26_contraction/contraction_scale_xdl_fp32.cpp @@ -15,6 +15,7 @@ #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#include "ck/library/utility/numeric.hpp" template using S = ck::Sequence; @@ -341,20 +342,14 @@ int main(int argc, char* argv[]) float ave_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel}); - ck::index_t M = std::accumulate(e_ms_ns_lengths.begin(), - e_ms_ns_lengths.begin() + NumDimM, - ck::index_t{1}, - std::multiplies{}); + ck::index_t M = + ck::accumulate_n(e_ms_ns_lengths.begin(), NumDimM, 1, std::multiplies<>{}); - ck::index_t N = std::accumulate(e_ms_ns_lengths.begin() + NumDimM, - e_ms_ns_lengths.begin() + NumDimM + NumDimN, - ck::index_t{1}, - std::multiplies{}); + ck::index_t N = ck::accumulate_n( + e_ms_ns_lengths.begin() + NumDimM, NumDimN, 1, std::multiplies<>{}); - ck::index_t K = std::accumulate(a_ms_ks_lengths.begin() + NumDimM, - a_ms_ks_lengths.begin() + NumDimM + NumDimK, - ck::index_t{1}, - std::multiplies{}); + ck::index_t K = ck::accumulate_n( + a_ms_ks_lengths.begin() + NumDimM, NumDimK, 1, std::multiplies<>{}); std::size_t flop = std::size_t(2) * M * N * K; std::size_t num_btype = diff --git a/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp index 32a714824c..f8e6501ead 100644 --- a/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp +++ b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp @@ -16,6 +16,7 @@ #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#include "ck/library/utility/numeric.hpp" template using S = ck::Sequence; @@ -302,20 +303,14 @@ int main(int argc, char* argv[]) Tensor d_ms_ns(d_ms_ns_lengths, d_ms_ns_strides); Tensor e_ms_ns_device_result(e_ms_ns_lengths, e_ms_ns_strides); - ck::index_t M_ = std::accumulate(e_ms_ns_lengths.begin(), - e_ms_ns_lengths.begin() + NumDimM, - ck::index_t{1}, - std::multiplies{}); + ck::index_t M_ = + ck::accumulate_n(e_ms_ns_lengths.begin(), NumDimM, 1, std::multiplies<>{}); - ck::index_t N_ = std::accumulate(e_ms_ns_lengths.begin() + NumDimM, - e_ms_ns_lengths.begin() + NumDimM + NumDimN, - ck::index_t{1}, - std::multiplies{}); + ck::index_t N_ = ck::accumulate_n( + e_ms_ns_lengths.begin() + NumDimM, NumDimN, 1, std::multiplies<>{}); - ck::index_t K_ = std::accumulate(a_ms_ks_lengths.begin() + NumDimM, - a_ms_ks_lengths.begin() + NumDimM + NumDimK, - ck::index_t{1}, - std::multiplies{}); + ck::index_t K_ = ck::accumulate_n( + a_ms_ks_lengths.begin() + NumDimM, NumDimK, 1, std::multiplies<>{}); a_tensors.push_back(a_ms_ks); b_tensors.push_back(b_ns_ks); diff --git a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp index b94fe8fd25..25d815b9cd 100644 --- a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp +++ b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp @@ -15,6 +15,7 @@ #include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor_generator.hpp" +#include "ck/library/utility/numeric.hpp" template using S = ck::Sequence; @@ -317,25 +318,17 @@ int main(int argc, char* argv[]) float ave_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel}); - ck::index_t G = std::accumulate(e_gs_ms_ns_lengths.begin(), - e_gs_ms_ns_lengths.begin() + NumDimG, - ck::index_t{1}, - std::multiplies{}); + ck::index_t G = + ck::accumulate_n(e_gs_ms_ns_lengths.begin(), NumDimG, 1, std::multiplies<>{}); - ck::index_t M = std::accumulate(e_gs_ms_ns_lengths.begin() + NumDimG, - e_gs_ms_ns_lengths.begin() + NumDimG + NumDimM, - ck::index_t{1}, - std::multiplies{}); + ck::index_t M = ck::accumulate_n( + e_gs_ms_ns_lengths.begin() + NumDimG, NumDimM, 1, std::multiplies<>{}); - ck::index_t N = std::accumulate(e_gs_ms_ns_lengths.begin() + NumDimG + NumDimM, - e_gs_ms_ns_lengths.begin() + NumDimG + NumDimM + NumDimN, - ck::index_t{1}, - std::multiplies{}); + ck::index_t N = ck::accumulate_n( + e_gs_ms_ns_lengths.begin() + NumDimG + NumDimM, NumDimN, 1, std::multiplies<>{}); - ck::index_t K = std::accumulate(a_gs_ms_ks_lengths.begin() + NumDimG + NumDimM, - a_gs_ms_ks_lengths.begin() + NumDimG + NumDimM + NumDimK, - ck::index_t{1}, - std::multiplies{}); + ck::index_t K = ck::accumulate_n( + a_gs_ms_ks_lengths.begin() + NumDimG + NumDimM, NumDimK, 1, std::multiplies<>{}); std::size_t flop = std::size_t(2) * G * M * N * K; std::size_t num_btype = sizeof(ADataType) * G * M * K + sizeof(BDataType) * G * K * N + diff --git a/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc b/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc index 104397928d..a2c97f4d42 100644 --- a/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc +++ b/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc @@ -120,18 +120,14 @@ bool run_grouped_conv_conv_fwd(bool do_verification, const ck::index_t gemm_batch = a0_g_n_c_wis_lengths[0]; const ck::index_t gemm0_m_length = - e1_g_n_k_wos_lengths[1] * std::accumulate(e1_g_n_k_wos_lengths.begin() + 3, - e1_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - ck::index_t{1}, - std::multiplies{}); + e1_g_n_k_wos_lengths[1] * + ck::accumulate_n( + e1_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>{}); const ck::index_t gemm0_n_length = b0_g_k_c_xs_lengths[1]; - const ck::index_t gemm0_k_length = - std::accumulate(b0_g_k_c_xs_lengths.begin() + 2, - b0_g_k_c_xs_lengths.begin() + 2 + NDimSpatial + 1, - ck::index_t{1}, - std::multiplies{}); + const ck::index_t gemm0_k_length = ck::accumulate_n( + b0_g_k_c_xs_lengths.begin() + 2, NDimSpatial + 1, 1, std::multiplies<>{}); const ck::index_t gemm1_n_length = b1_g_k_c_xs_lengths[1]; diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp index beb3da6992..8b54ee493c 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp @@ -22,6 +22,7 @@ #include "ck/host_utility/device_prop.hpp" #include "ck/host_utility/kernel_launch.hpp" #include "ck/host_utility/io.hpp" +#include "ck/library/utility/numeric.hpp" namespace ck { namespace tensor_operation { @@ -410,10 +411,9 @@ struct DeviceGroupedConvFwdMultipleDMultipleR_Xdl_CShuffle { const index_t N = r_g_n_wos_lengths[1]; - const index_t NHoWo = N * std::accumulate(r_g_n_wos_lengths.begin() + 2, - r_g_n_wos_lengths.begin() + 2 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t NHoWo = + N * ck::accumulate_n( + r_g_n_wos_lengths.begin() + 2, NDimSpatial, 1, std::multiplies<>()); const auto r_grid_desc_mraw = make_naive_tensor_descriptor_packed(make_tuple(NHoWo)); @@ -435,10 +435,9 @@ struct DeviceGroupedConvFwdMultipleDMultipleR_Xdl_CShuffle const index_t WoStride = r_g_n_wos_strides[NDimSpatial + 2]; - const index_t NHoWo = N * std::accumulate(r_g_n_wos_lengths.begin() + 2, - r_g_n_wos_lengths.begin() + 2 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t NHoWo = + N * ck::accumulate_n( + r_g_n_wos_lengths.begin() + 2, NDimSpatial, 1, std::multiplies<>()); const auto r_grid_desc_mraw = make_naive_tensor_descriptor(make_tuple(NHoWo), make_tuple(WoStride)); diff --git a/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp index 80934f7803..1b5e64b66c 100644 --- a/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp +++ b/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp @@ -4,6 +4,7 @@ #pragma once +#include "ck/library/utility/numeric.hpp" #include "ck/utility/common_header.hpp" #include "ck/tensor_description/tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp" @@ -47,10 +48,9 @@ struct TransformConvFwdToGemm if constexpr(ConvForwardSpecialization == device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0) { - const index_t NWo = N * std::accumulate(c_g_n_k_wos_lengths.begin() + 3, - c_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t NWo = + N * ck::accumulate_n( + c_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); const auto in_gemmm_gemmk_desc = make_naive_tensor_descriptor_packed(make_tuple(NWo, C)); @@ -146,10 +146,9 @@ struct TransformConvFwdToGemm if constexpr(ConvForwardSpecialization == device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0) { - const index_t NHoWo = N * std::accumulate(c_g_n_k_wos_lengths.begin() + 3, - c_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t NHoWo = + N * ck::accumulate_n( + c_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); const auto in_gemmm_gemmk_desc = make_naive_tensor_descriptor_packed(make_tuple(NHoWo, C)); @@ -262,10 +261,8 @@ struct TransformConvFwdToGemm device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0) { const index_t NDoHoWo = - N * std::accumulate(c_g_n_k_wos_lengths.begin() + 3, - c_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + N * ck::accumulate_n( + c_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); const auto in_gemmm_gemmk_desc = make_naive_tensor_descriptor_packed(make_tuple(NDoHoWo, C)); @@ -390,10 +387,9 @@ struct TransformConvFwdToGemm if constexpr(ConvForwardSpecialization == device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0) { - const index_t NHoWo = N * std::accumulate(c_g_n_k_wos_lengths.begin() + 3, - c_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t NHoWo = + N * ck::accumulate_n( + c_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); // This is different const index_t WiStride = a_g_n_c_wis_strides[2 + NDimSpatial]; @@ -506,10 +502,9 @@ struct TransformConvFwdToGemm if constexpr(ConvForwardSpecialization == device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0) { - const index_t NHoWo = N * std::accumulate(c_g_n_k_wos_lengths.begin() + 3, - c_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t NHoWo = + N * ck::accumulate_n( + c_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); // This is different const index_t WiStride = a_g_n_c_wis_strides[2 + NDimSpatial]; @@ -639,10 +634,8 @@ struct TransformConvFwdToGemm device::ConvolutionForwardSpecialization::Filter1x1Stride1Pad0) { const index_t NDoHoWo = - N * std::accumulate(c_g_n_k_wos_lengths.begin() + 3, - c_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + N * ck::accumulate_n( + c_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); // This is different const index_t WiStride = a_g_n_c_wis_strides[2 + NDimSpatial]; @@ -768,10 +761,8 @@ struct TransformConvFwdToGemm const index_t K = b_g_k_c_xs_lengths[1]; const index_t C = b_g_k_c_xs_lengths[2]; - const index_t YX = std::accumulate(b_g_k_c_xs_lengths.begin() + 3, - b_g_k_c_xs_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t YX = ck::accumulate_n( + b_g_k_c_xs_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); const auto wei_gemmn_gemmk_desc = make_naive_tensor_descriptor_packed(make_tuple(K, YX * C)); @@ -794,10 +785,8 @@ struct TransformConvFwdToGemm const index_t K = b_g_k_c_xs_lengths[1]; const index_t C = b_g_k_c_xs_lengths[2]; - const index_t YX = std::accumulate(b_g_k_c_xs_lengths.begin() + 3, - b_g_k_c_xs_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t YX = ck::accumulate_n( + b_g_k_c_xs_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); const index_t KStride = b_g_k_c_xs_strides[1]; const index_t XStride = b_g_k_c_xs_strides[2 + NDimSpatial]; @@ -827,10 +816,9 @@ struct TransformConvFwdToGemm const index_t N = c_g_n_k_wos_lengths[1]; const index_t K = c_g_n_k_wos_lengths[2]; - const index_t NHoWo = N * std::accumulate(c_g_n_k_wos_lengths.begin() + 3, - c_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t NHoWo = + N * ck::accumulate_n( + c_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); const auto out_gemmm_gemmn_desc = make_naive_tensor_descriptor_packed(make_tuple(NHoWo, K)); @@ -855,10 +843,9 @@ struct TransformConvFwdToGemm const auto KStride = I1; const index_t WoStride = c_g_n_k_wos_strides[NDimSpatial + 2]; - const index_t NHoWo = N * std::accumulate(c_g_n_k_wos_lengths.begin() + 3, - c_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t NHoWo = + N * ck::accumulate_n( + c_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); const auto out_gemmm_gemmn_desc = make_naive_tensor_descriptor(make_tuple(NHoWo, K), make_tuple(WoStride, KStride)); @@ -878,10 +865,9 @@ struct TransformConvFwdToGemm const index_t N = c_g_n_k_wos_lengths[1]; const index_t K = c_g_n_k_wos_lengths[2]; - const index_t NHoWo = N * std::accumulate(c_g_n_k_wos_lengths.begin() + 3, - c_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, - index_t{1}, - std::multiplies()); + const index_t NHoWo = + N * ck::accumulate_n( + c_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>()); const auto out_gemmm_gemmn_desc = make_naive_tensor_descriptor(make_tuple(NHoWo, K), make_tuple(I0, I1)); diff --git a/library/include/ck/library/utility/convolution_parameter.hpp b/library/include/ck/library/utility/convolution_parameter.hpp index 1c80e392fd..f4a2b56f75 100644 --- a/library/include/ck/library/utility/convolution_parameter.hpp +++ b/library/include/ck/library/utility/convolution_parameter.hpp @@ -10,6 +10,8 @@ #include "ck/ck.hpp" +#include "ck/library/utility/numeric.hpp" + namespace ck { namespace utils { namespace conv { @@ -55,10 +57,8 @@ struct ConvParam // sizeof(InDataType) * (G * N * C * ) + return sizeof(InDataType) * (G_ * N_ * C_ * - std::accumulate(std::begin(input_spatial_lengths_), - std::begin(input_spatial_lengths_) + num_dim_spatial_, - static_cast(1), - std::multiplies())); + ck::accumulate_n( + std::begin(input_spatial_lengths_), num_dim_spatial_, 1, std::multiplies<>())); } template @@ -67,10 +67,8 @@ struct ConvParam // sizeof(WeiDataType) * (G * K * C * ) + return sizeof(WeiDataType) * (G_ * K_ * C_ * - std::accumulate(std::begin(filter_spatial_lengths_), - std::begin(filter_spatial_lengths_) + num_dim_spatial_, - static_cast(1), - std::multiplies())); + ck::accumulate_n( + std::begin(filter_spatial_lengths_), num_dim_spatial_, 1, std::multiplies<>())); } template diff --git a/library/include/ck/library/utility/numeric.hpp b/library/include/ck/library/utility/numeric.hpp new file mode 100644 index 0000000000..70a7e87ab1 --- /dev/null +++ b/library/include/ck/library/utility/numeric.hpp @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. + +#pragma once + +#include +#include + +namespace ck { +template +auto accumulate_n(ForwardIterator first, Size count, T init, BinaryOperation op) + -> decltype(std::accumulate(first, std::next(first, count), init, op)) +{ + return std::accumulate(first, std::next(first, count), init, op); +} +} // namespace ck diff --git a/library/src/utility/convolution_parameter.cpp b/library/src/utility/convolution_parameter.cpp index 82bb09e60c..c8712d2093 100644 --- a/library/src/utility/convolution_parameter.cpp +++ b/library/src/utility/convolution_parameter.cpp @@ -72,14 +72,10 @@ std::size_t ConvParam::GetFlops() const { // 2 * G * N * K * C * * return static_cast(2) * G_ * N_ * K_ * C_ * - std::accumulate(std::begin(output_spatial_lengths_), - std::begin(output_spatial_lengths_) + num_dim_spatial_, - static_cast(1), - std::multiplies()) * - std::accumulate(std::begin(filter_spatial_lengths_), - std::begin(filter_spatial_lengths_) + num_dim_spatial_, - static_cast(1), - std::multiplies()); + ck::accumulate_n( + std::begin(output_spatial_lengths_), num_dim_spatial_, 1, std::multiplies<>()) * + ck::accumulate_n( + std::begin(filter_spatial_lengths_), num_dim_spatial_, 1, std::multiplies<>()); } std::string get_conv_param_parser_helper_msg()