Unify the naming of the math functions used by the host and kernel (#262)

* Use the unified naming for math functions on host and HIP kernel * Corresponding change/simplification in reduction host/profiler/examples due to unified math functions renaming * Renaming GetReductionZeroVal() to GetIdentityValue() * Tiny renaming in profile_reduce_impl.hpp * More renaming in profile_reduce_impl.hpp * Replace zeroVal by identiyVal * Remove ck_ prefix in the naming of ck::math provided functions [ROCm/composable_kernel commit: 86185bd7ce]
2026-05-23 22:34:36 +00:00 · 2022-06-02 10:49:53 +08:00
parent 8d5fe58cd5
commit 3c29c7cac5
22 changed files with 198 additions and 417 deletions
--- a/library/include/ck/library/host_tensor/host_reduce_util.hpp
+++ b/library/include/ck/library/host_tensor/host_reduce_util.hpp
@@ -1,257 +0,0 @@
-/*******************************************************************************
- *
- * MIT License
- *
- * Copyright (c) 2020 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- *******************************************************************************/
-#ifndef GUARD_HOST_REDUCE_UTIL_HPP
-#define GUARD_HOST_REDUCE_UTIL_HPP
-
-#include <limits>
-#include <cmath>
-#include <functional>
-
-#include "reduction_enums.hpp"
-#include "data_type.hpp"
-#include "math_v2.hpp"
-
-namespace ck {
-
-namespace host_reduce {
-
-using ck::NanPropagation;
-using ck::ReduceTensorOp;
-
-template <typename AccDataType, ReduceTensorOp ReduceOpId>
-__host__ static inline std::function<void(AccDataType&)> PreUnaryOpFn(int)
-{
-    using ck::math::abs;
-
-    if constexpr(ReduceOpId == ReduceTensorOp::NORM1)
-    {
-        return ([&](AccDataType& a_) { a_ = abs(a_); });
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::NORM2)
-    {
-        return ([&](AccDataType& a_) { a_ = a_ * a_; });
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::AMAX)
-    {
-        return ([&](AccDataType& a_) { a_ = abs(a_); });
-    }
-    else
-    {
-        // ReduceTensorOp::AVG:
-        // ReduceTensorOp::ADD:
-        // ReduceTensorOp::MUL:
-        // ReduceTensorOp::MIN:
-        // ReduceTensorOp::MAX:
-        return ([&](AccDataType&) {});
-    };
-};
-
-template <typename AccDataType, ReduceTensorOp ReduceOpId>
-__host__ static inline std::function<void(AccDataType&)> PosUnaryOpFn(int32_t divider)
-{
-    using std::sqrt;
-
-    if constexpr(ReduceOpId == ReduceTensorOp::NORM2)
-    {
-        return ([&](AccDataType& a_) { a_ = sqrt(a_); });
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::AVG)
-    {
-        return ([&, divider](AccDataType& a_) {
-            a_ = a_ / static_cast<AccDataType>(static_cast<float>(divider));
-        });
-    }
-    else
-    {
-        // ReduceTensorOp::ADD:
-        // ReduceTensorOp::NORM1:
-        // ReduceTensorOp::MUL:
-        // ReduceTensorOp::MIN:
-        // ReduceTensorOp::MAX:
-        // ReduceTensorOp::AMAX:
-        return ([&](AccDataType&) {});
-    }
-};
-
-template <typename AccDataType, ReduceTensorOp ReduceOpId>
-__host__ static inline std::function<void(AccDataType&, AccDataType)> ReduceOpFn()
-{
-    if constexpr(ReduceOpId == ReduceTensorOp::ADD || ReduceOpId == ReduceTensorOp::AVG ||
-                 ReduceOpId == ReduceTensorOp::NORM1 || ReduceOpId == ReduceTensorOp::NORM2)
-    {
-        return ([&](AccDataType& a_, AccDataType b_) { a_ = a_ + b_; });
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::MUL)
-    {
-        return ([&](AccDataType& a_, AccDataType b_) { a_ = a_ * b_; });
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::MIN)
-    {
-        return ([&](AccDataType& a_, AccDataType b_) {
-            if(a_ > b_)
-                a_ = b_;
-        });
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::MAX || ReduceOpId == ReduceTensorOp::AMAX)
-    {
-        return ([&](AccDataType& a_, AccDataType b_) {
-            if(a_ < b_)
-                a_ = b_;
-        });
-    }
-};
-
-template <typename AccDataType, ReduceTensorOp ReduceOpId>
-__host__ static inline std::function<void(AccDataType&, AccDataType, bool& changed)> ReduceOpFn2()
-{
-    if constexpr(ReduceOpId == ReduceTensorOp::MIN)
-    {
-        return ([&](AccDataType& a_, AccDataType b_, bool& changed) {
-            if(a_ > b_)
-            {
-                a_      = b_;
-                changed = true;
-            }
-            else
-                changed = false;
-        });
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::MAX || ReduceOpId == ReduceTensorOp::AMAX)
-    {
-        return ([&](AccDataType& a_, AccDataType b_, bool& changed) {
-            if(a_ < b_)
-            {
-                a_      = b_;
-                changed = true;
-            }
-            else
-                changed = false;
-        });
-    }
-    else
-    {
-        // ReduceTensorOp::ADD:
-        // ReduceTensorOp::MUL:
-        // ReduceTensorOp::AVG:
-        // ReduceTensorOp::NORM1:
-        // ReduceTensorOp::NORM2:
-        return (std::function<void(AccDataType&, AccDataType, bool&)>{});
-    };
-};
-
-template <typename AccDataType, ReduceTensorOp ReduceOpId>
-__host__ static inline AccDataType ReduceOpZeroVal()
-{
-    if constexpr(ReduceOpId == ReduceTensorOp::MUL)
-    {
-        return (static_cast<AccDataType>(1.0f));
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::MIN)
-    {
-        return (ck::NumericLimits<AccDataType>::Max());
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::MAX)
-    {
-        return (ck::NumericLimits<AccDataType>::Lowest());
-    }
-    else if constexpr(ReduceOpId == ReduceTensorOp::AMAX)
-    {
-        return (static_cast<AccDataType>(0.0f));
-    }
-    else
-    {
-        // ReduceTensorOp::ADD
-        // ReduceTensorOp::AVG
-        // ReduceTensorOp::NORM1
-        // ReduceTensorOp::NORM2
-        return (static_cast<AccDataType>(0.0f));
-    };
-};
-
-template <typename AccDataType, bool PropagateNan>
-__host__ static inline void
-binop_with_nan_check(std::function<void(AccDataType&, AccDataType)> opReduce,
-                     AccDataType& accuVal,
-                     AccDataType currVal)
-{
-    using ck::math::isnan;
-
-    if constexpr(!PropagateNan)
-    {
-        opReduce(accuVal, currVal);
-    }
-    else
-    {
-        if(isnan(currVal))
-            accuVal = currVal;
-        else
-            opReduce(accuVal, currVal);
-    };
-};
-
-template <typename AccDataType, typename IndexDataType, bool PropagateNan>
-__host__ static inline void
-binop_with_index_and_nan_check(std::function<void(AccDataType&, AccDataType, bool&)> opReduce,
-                               AccDataType& accuVal,
-                               AccDataType currVal,
-                               IndexDataType& accuIndex,
-                               IndexDataType currIndex)
-{
-    using ck::math::isnan;
-
-    if constexpr(!PropagateNan)
-    {
-        bool changed;
-
-        opReduce(accuVal, currVal, changed);
-
-        if(changed)
-            accuIndex = currIndex;
-    }
-    else
-    {
-        if(isnan(currVal))
-        {
-            accuVal   = currVal;
-            accuIndex = currIndex;
-        }
-        else
-        {
-            bool changed;
-
-            opReduce(accuVal, currVal, changed);
-
-            if(changed)
-                accuIndex = currIndex;
-        };
-    };
-};
-
-}; // namespace host_reduce
-
-}; // namespace ck
-
-#endif
--- a/library/include/ck/library/host_tensor/host_reduction.hpp
+++ b/library/include/ck/library/host_tensor/host_reduction.hpp
@@ -33,10 +33,10 @@

 #include "reduction_enums.hpp"
 #include "reduction_common.hpp"
-#include "host_reduce_util.hpp"
 #include "host_common_util.hpp"
 #include "host_tensor.hpp"
 #include "data_type.hpp"
+#include "reduction_functions_accumulate.hpp"

 template <int NDim>
 static void get_all_indexes(const std::array<size_t, NDim>& dimLengths,
@@ -106,11 +106,13 @@ static size_t get_offset_from_index(const std::vector<size_t>& strides,
 template <typename InDataType,
          typename AccDataType,
          typename OutDataType,
-          ck::ReduceTensorOp ReduceOpId,
+          typename ReduceOperation,
+          typename InElementwiseOperation,
+          typename AccElementwiseOperation,
          int Rank,
          int NumReduceDim,
          bool PropagateNan,
-          bool NeedIndices>
+          bool OutputIndex>
 struct ReductionHost
 {
    using IndexDataType = int32_t;
@@ -122,8 +124,6 @@ struct ReductionHost
    std::vector<int> reduceDims;

    IndexDataType divider;
-    std::function<void(AccDataType&)> preUnaryOp;
-    std::function<void(AccDataType&)> posUnaryOp;
    std::array<size_t, NumReduceDim> reduceLengths;
    std::array<size_t, NumReduceDim> reduceStrides;
    std::array<size_t, NumInvariantDim> invariantLengths;
@@ -137,9 +137,6 @@ struct ReductionHost
                  const std::vector<int>& invariantDims_,
                  const std::vector<int>& reduceDims_)
    {
-        using ck::host_reduce::PosUnaryOpFn;
-        using ck::host_reduce::PreUnaryOpFn;
-
        // this->outLengths = to_int_vector(outDesc.GetLengths());
        this->outStrides = outDesc.GetStrides();

@@ -171,9 +168,6 @@ struct ReductionHost
            invariant_dim_indexes.clear();
            get_all_indexes<NumInvariantDim>(invariantLengths, invariant_dim_indexes);
        };
-
-        preUnaryOp = PreUnaryOpFn<AccDataType, ReduceOpId>(divider);
-        posUnaryOp = PosUnaryOpFn<AccDataType, ReduceOpId>(divider);
    };

    void Run(float alpha,
@@ -182,7 +176,7 @@ struct ReductionHost
             OutDataType* out_data,
             IndexDataType* out_indices)
    {
-        if constexpr(NeedIndices)
+        if constexpr(OutputIndex)
        {
            RunImpl_with_index(alpha, in_data, beta, out_data, out_indices);
        }
@@ -201,15 +195,17 @@ struct ReductionHost
        using ck::float_equal_one;
        using ck::float_equal_zero;
        using ck::type_convert;
-        using ck::host_reduce::binop_with_index_and_nan_check;
-        using ck::host_reduce::ReduceOpFn2;
-        using ck::host_reduce::ReduceOpZeroVal;

-        auto opReduce2 = ReduceOpFn2<AccDataType, ReduceOpId>();
+        using Accumulation = ck::detail::AccumulateWithIndexAndNanCheck<PropagateNan,
+                                                                        ReduceOperation,
+                                                                        AccDataType,
+                                                                        IndexDataType>;
+        InElementwiseOperation in_elementwise_op(divider);
+        AccElementwiseOperation acc_elementwise_op(divider);

        if constexpr(NumInvariantDim == 0)
        {
-            AccDataType accuVal     = ReduceOpZeroVal<AccDataType, ReduceOpId>();
+            AccDataType accuVal     = ReduceOperation::GetIdentityValue();
            IndexDataType accuIndex = 0;

            for(std::size_t i = 0; i < reduce_dim_indexes.size(); i++)
@@ -219,15 +215,14 @@ struct ReductionHost

                auto currVal = type_convert<AccDataType>(in_data[offset_reduce]);

-                preUnaryOp(currVal);
+                in_elementwise_op(currVal, currVal);

                auto currIndex = static_cast<IndexDataType>(i);

-                binop_with_index_and_nan_check<AccDataType, IndexDataType, PropagateNan>(
-                    opReduce2, accuVal, currVal, accuIndex, currIndex);
+                Accumulation::Calculate(accuVal, currVal, accuIndex, currIndex);
            };

-            posUnaryOp(accuVal);
+            acc_elementwise_op(accuVal, accuVal);

            if(!float_equal_one{}(alpha))
                accuVal *= type_convert<AccDataType>(alpha);
@@ -241,7 +236,7 @@ struct ReductionHost
        else
        {
            auto thread_reduce_func = [&](auto invariant_index) {
-                AccDataType accuVal     = ReduceOpZeroVal<AccDataType, ReduceOpId>();
+                AccDataType accuVal     = ReduceOperation::GetIdentityValue();
                IndexDataType accuIndex = 0;

                auto offset_invariant =
@@ -255,15 +250,14 @@ struct ReductionHost
                    auto currVal =
                        type_convert<AccDataType>(in_data[offset_invariant + offset_reduce]);

-                    preUnaryOp(currVal);
+                    in_elementwise_op(currVal, currVal);

                    auto currIndex = static_cast<IndexDataType>(i);

-                    binop_with_index_and_nan_check<AccDataType, IndexDataType, PropagateNan>(
-                        opReduce2, accuVal, currVal, accuIndex, currIndex);
+                    Accumulation::Calculate(accuVal, currVal, accuIndex, currIndex);
                };

-                posUnaryOp(accuVal);
+                acc_elementwise_op(accuVal, accuVal);

                if(!float_equal_one{}(alpha))
                    accuVal *= type_convert<AccDataType>(alpha);
@@ -308,15 +302,16 @@ struct ReductionHost
        using ck::float_equal_one;
        using ck::float_equal_zero;
        using ck::type_convert;
-        using ck::host_reduce::binop_with_nan_check;
-        using ck::host_reduce::ReduceOpFn;
-        using ck::host_reduce::ReduceOpZeroVal;

-        auto opReduce = ReduceOpFn<AccDataType, ReduceOpId>();
+        using Accumulation =
+            ck::detail::AccumulateWithNanCheck<PropagateNan, ReduceOperation, AccDataType>;
+
+        InElementwiseOperation in_elementwise_op(divider);
+        AccElementwiseOperation acc_elementwise_op(divider);

        if constexpr(NumInvariantDim == 0)
        {
-            AccDataType accuVal = ReduceOpZeroVal<AccDataType, ReduceOpId>();
+            AccDataType accuVal = ReduceOperation::GetIdentityValue();

            for(const auto& reduce_index : reduce_dim_indexes)
            {
@@ -325,12 +320,12 @@ struct ReductionHost

                auto currVal = type_convert<AccDataType>(in_data[offset_reduce]);

-                preUnaryOp(currVal);
+                in_elementwise_op(currVal, currVal);

-                binop_with_nan_check<AccDataType, PropagateNan>(opReduce, accuVal, currVal);
+                Accumulation::Calculate(accuVal, currVal);
            };

-            posUnaryOp(accuVal);
+            acc_elementwise_op(accuVal, accuVal);

            if(!float_equal_one{}(alpha))
                accuVal *= type_convert<AccDataType>(alpha);
@@ -343,7 +338,7 @@ struct ReductionHost
        else
        {
            auto thread_reduce_func = [&](auto invariant_index) {
-                AccDataType accuVal = ReduceOpZeroVal<AccDataType, ReduceOpId>();
+                AccDataType accuVal = ReduceOperation::GetIdentityValue();

                auto offset_invariant =
                    get_offset_from_index<NumInvariantDim>(invariantStrides, invariant_index);
@@ -356,12 +351,12 @@ struct ReductionHost
                    auto currVal =
                        type_convert<AccDataType>(in_data[offset_invariant + offset_reduce]);

-                    preUnaryOp(currVal);
+                    in_elementwise_op(currVal, currVal);

-                    binop_with_nan_check<AccDataType, PropagateNan>(opReduce, accuVal, currVal);
+                    Accumulation::Calculate(accuVal, currVal);
                };

-                posUnaryOp(accuVal);
+                acc_elementwise_op(accuVal, accuVal);

                if(!float_equal_one{}(alpha))
                    accuVal *= type_convert<AccDataType>(alpha);