mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-23 22:34:36 +00:00
Unify the naming of the math functions used by the host and kernel (#262)
* Use the unified naming for math functions on host and HIP kernel
* Corresponding change/simplification in reduction host/profiler/examples due to unified math functions renaming
* Renaming GetReductionZeroVal() to GetIdentityValue()
* Tiny renaming in profile_reduce_impl.hpp
* More renaming in profile_reduce_impl.hpp
* Replace zeroVal by identiyVal
* Remove ck_ prefix in the naming of ck::math provided functions
[ROCm/composable_kernel commit: 86185bd7ce]
This commit is contained in:
@@ -1,257 +0,0 @@
|
||||
/*******************************************************************************
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2020 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*******************************************************************************/
|
||||
#ifndef GUARD_HOST_REDUCE_UTIL_HPP
|
||||
#define GUARD_HOST_REDUCE_UTIL_HPP
|
||||
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
|
||||
#include "reduction_enums.hpp"
|
||||
#include "data_type.hpp"
|
||||
#include "math_v2.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
namespace host_reduce {
|
||||
|
||||
using ck::NanPropagation;
|
||||
using ck::ReduceTensorOp;
|
||||
|
||||
template <typename AccDataType, ReduceTensorOp ReduceOpId>
|
||||
__host__ static inline std::function<void(AccDataType&)> PreUnaryOpFn(int)
|
||||
{
|
||||
using ck::math::abs;
|
||||
|
||||
if constexpr(ReduceOpId == ReduceTensorOp::NORM1)
|
||||
{
|
||||
return ([&](AccDataType& a_) { a_ = abs(a_); });
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::NORM2)
|
||||
{
|
||||
return ([&](AccDataType& a_) { a_ = a_ * a_; });
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::AMAX)
|
||||
{
|
||||
return ([&](AccDataType& a_) { a_ = abs(a_); });
|
||||
}
|
||||
else
|
||||
{
|
||||
// ReduceTensorOp::AVG:
|
||||
// ReduceTensorOp::ADD:
|
||||
// ReduceTensorOp::MUL:
|
||||
// ReduceTensorOp::MIN:
|
||||
// ReduceTensorOp::MAX:
|
||||
return ([&](AccDataType&) {});
|
||||
};
|
||||
};
|
||||
|
||||
template <typename AccDataType, ReduceTensorOp ReduceOpId>
|
||||
__host__ static inline std::function<void(AccDataType&)> PosUnaryOpFn(int32_t divider)
|
||||
{
|
||||
using std::sqrt;
|
||||
|
||||
if constexpr(ReduceOpId == ReduceTensorOp::NORM2)
|
||||
{
|
||||
return ([&](AccDataType& a_) { a_ = sqrt(a_); });
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::AVG)
|
||||
{
|
||||
return ([&, divider](AccDataType& a_) {
|
||||
a_ = a_ / static_cast<AccDataType>(static_cast<float>(divider));
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
// ReduceTensorOp::ADD:
|
||||
// ReduceTensorOp::NORM1:
|
||||
// ReduceTensorOp::MUL:
|
||||
// ReduceTensorOp::MIN:
|
||||
// ReduceTensorOp::MAX:
|
||||
// ReduceTensorOp::AMAX:
|
||||
return ([&](AccDataType&) {});
|
||||
}
|
||||
};
|
||||
|
||||
template <typename AccDataType, ReduceTensorOp ReduceOpId>
|
||||
__host__ static inline std::function<void(AccDataType&, AccDataType)> ReduceOpFn()
|
||||
{
|
||||
if constexpr(ReduceOpId == ReduceTensorOp::ADD || ReduceOpId == ReduceTensorOp::AVG ||
|
||||
ReduceOpId == ReduceTensorOp::NORM1 || ReduceOpId == ReduceTensorOp::NORM2)
|
||||
{
|
||||
return ([&](AccDataType& a_, AccDataType b_) { a_ = a_ + b_; });
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::MUL)
|
||||
{
|
||||
return ([&](AccDataType& a_, AccDataType b_) { a_ = a_ * b_; });
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::MIN)
|
||||
{
|
||||
return ([&](AccDataType& a_, AccDataType b_) {
|
||||
if(a_ > b_)
|
||||
a_ = b_;
|
||||
});
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::MAX || ReduceOpId == ReduceTensorOp::AMAX)
|
||||
{
|
||||
return ([&](AccDataType& a_, AccDataType b_) {
|
||||
if(a_ < b_)
|
||||
a_ = b_;
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
template <typename AccDataType, ReduceTensorOp ReduceOpId>
|
||||
__host__ static inline std::function<void(AccDataType&, AccDataType, bool& changed)> ReduceOpFn2()
|
||||
{
|
||||
if constexpr(ReduceOpId == ReduceTensorOp::MIN)
|
||||
{
|
||||
return ([&](AccDataType& a_, AccDataType b_, bool& changed) {
|
||||
if(a_ > b_)
|
||||
{
|
||||
a_ = b_;
|
||||
changed = true;
|
||||
}
|
||||
else
|
||||
changed = false;
|
||||
});
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::MAX || ReduceOpId == ReduceTensorOp::AMAX)
|
||||
{
|
||||
return ([&](AccDataType& a_, AccDataType b_, bool& changed) {
|
||||
if(a_ < b_)
|
||||
{
|
||||
a_ = b_;
|
||||
changed = true;
|
||||
}
|
||||
else
|
||||
changed = false;
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
// ReduceTensorOp::ADD:
|
||||
// ReduceTensorOp::MUL:
|
||||
// ReduceTensorOp::AVG:
|
||||
// ReduceTensorOp::NORM1:
|
||||
// ReduceTensorOp::NORM2:
|
||||
return (std::function<void(AccDataType&, AccDataType, bool&)>{});
|
||||
};
|
||||
};
|
||||
|
||||
template <typename AccDataType, ReduceTensorOp ReduceOpId>
|
||||
__host__ static inline AccDataType ReduceOpZeroVal()
|
||||
{
|
||||
if constexpr(ReduceOpId == ReduceTensorOp::MUL)
|
||||
{
|
||||
return (static_cast<AccDataType>(1.0f));
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::MIN)
|
||||
{
|
||||
return (ck::NumericLimits<AccDataType>::Max());
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::MAX)
|
||||
{
|
||||
return (ck::NumericLimits<AccDataType>::Lowest());
|
||||
}
|
||||
else if constexpr(ReduceOpId == ReduceTensorOp::AMAX)
|
||||
{
|
||||
return (static_cast<AccDataType>(0.0f));
|
||||
}
|
||||
else
|
||||
{
|
||||
// ReduceTensorOp::ADD
|
||||
// ReduceTensorOp::AVG
|
||||
// ReduceTensorOp::NORM1
|
||||
// ReduceTensorOp::NORM2
|
||||
return (static_cast<AccDataType>(0.0f));
|
||||
};
|
||||
};
|
||||
|
||||
template <typename AccDataType, bool PropagateNan>
|
||||
__host__ static inline void
|
||||
binop_with_nan_check(std::function<void(AccDataType&, AccDataType)> opReduce,
|
||||
AccDataType& accuVal,
|
||||
AccDataType currVal)
|
||||
{
|
||||
using ck::math::isnan;
|
||||
|
||||
if constexpr(!PropagateNan)
|
||||
{
|
||||
opReduce(accuVal, currVal);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(isnan(currVal))
|
||||
accuVal = currVal;
|
||||
else
|
||||
opReduce(accuVal, currVal);
|
||||
};
|
||||
};
|
||||
|
||||
template <typename AccDataType, typename IndexDataType, bool PropagateNan>
|
||||
__host__ static inline void
|
||||
binop_with_index_and_nan_check(std::function<void(AccDataType&, AccDataType, bool&)> opReduce,
|
||||
AccDataType& accuVal,
|
||||
AccDataType currVal,
|
||||
IndexDataType& accuIndex,
|
||||
IndexDataType currIndex)
|
||||
{
|
||||
using ck::math::isnan;
|
||||
|
||||
if constexpr(!PropagateNan)
|
||||
{
|
||||
bool changed;
|
||||
|
||||
opReduce(accuVal, currVal, changed);
|
||||
|
||||
if(changed)
|
||||
accuIndex = currIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(isnan(currVal))
|
||||
{
|
||||
accuVal = currVal;
|
||||
accuIndex = currIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
bool changed;
|
||||
|
||||
opReduce(accuVal, currVal, changed);
|
||||
|
||||
if(changed)
|
||||
accuIndex = currIndex;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
}; // namespace host_reduce
|
||||
|
||||
}; // namespace ck
|
||||
|
||||
#endif
|
||||
@@ -33,10 +33,10 @@
|
||||
|
||||
#include "reduction_enums.hpp"
|
||||
#include "reduction_common.hpp"
|
||||
#include "host_reduce_util.hpp"
|
||||
#include "host_common_util.hpp"
|
||||
#include "host_tensor.hpp"
|
||||
#include "data_type.hpp"
|
||||
#include "reduction_functions_accumulate.hpp"
|
||||
|
||||
template <int NDim>
|
||||
static void get_all_indexes(const std::array<size_t, NDim>& dimLengths,
|
||||
@@ -106,11 +106,13 @@ static size_t get_offset_from_index(const std::vector<size_t>& strides,
|
||||
template <typename InDataType,
|
||||
typename AccDataType,
|
||||
typename OutDataType,
|
||||
ck::ReduceTensorOp ReduceOpId,
|
||||
typename ReduceOperation,
|
||||
typename InElementwiseOperation,
|
||||
typename AccElementwiseOperation,
|
||||
int Rank,
|
||||
int NumReduceDim,
|
||||
bool PropagateNan,
|
||||
bool NeedIndices>
|
||||
bool OutputIndex>
|
||||
struct ReductionHost
|
||||
{
|
||||
using IndexDataType = int32_t;
|
||||
@@ -122,8 +124,6 @@ struct ReductionHost
|
||||
std::vector<int> reduceDims;
|
||||
|
||||
IndexDataType divider;
|
||||
std::function<void(AccDataType&)> preUnaryOp;
|
||||
std::function<void(AccDataType&)> posUnaryOp;
|
||||
std::array<size_t, NumReduceDim> reduceLengths;
|
||||
std::array<size_t, NumReduceDim> reduceStrides;
|
||||
std::array<size_t, NumInvariantDim> invariantLengths;
|
||||
@@ -137,9 +137,6 @@ struct ReductionHost
|
||||
const std::vector<int>& invariantDims_,
|
||||
const std::vector<int>& reduceDims_)
|
||||
{
|
||||
using ck::host_reduce::PosUnaryOpFn;
|
||||
using ck::host_reduce::PreUnaryOpFn;
|
||||
|
||||
// this->outLengths = to_int_vector(outDesc.GetLengths());
|
||||
this->outStrides = outDesc.GetStrides();
|
||||
|
||||
@@ -171,9 +168,6 @@ struct ReductionHost
|
||||
invariant_dim_indexes.clear();
|
||||
get_all_indexes<NumInvariantDim>(invariantLengths, invariant_dim_indexes);
|
||||
};
|
||||
|
||||
preUnaryOp = PreUnaryOpFn<AccDataType, ReduceOpId>(divider);
|
||||
posUnaryOp = PosUnaryOpFn<AccDataType, ReduceOpId>(divider);
|
||||
};
|
||||
|
||||
void Run(float alpha,
|
||||
@@ -182,7 +176,7 @@ struct ReductionHost
|
||||
OutDataType* out_data,
|
||||
IndexDataType* out_indices)
|
||||
{
|
||||
if constexpr(NeedIndices)
|
||||
if constexpr(OutputIndex)
|
||||
{
|
||||
RunImpl_with_index(alpha, in_data, beta, out_data, out_indices);
|
||||
}
|
||||
@@ -201,15 +195,17 @@ struct ReductionHost
|
||||
using ck::float_equal_one;
|
||||
using ck::float_equal_zero;
|
||||
using ck::type_convert;
|
||||
using ck::host_reduce::binop_with_index_and_nan_check;
|
||||
using ck::host_reduce::ReduceOpFn2;
|
||||
using ck::host_reduce::ReduceOpZeroVal;
|
||||
|
||||
auto opReduce2 = ReduceOpFn2<AccDataType, ReduceOpId>();
|
||||
using Accumulation = ck::detail::AccumulateWithIndexAndNanCheck<PropagateNan,
|
||||
ReduceOperation,
|
||||
AccDataType,
|
||||
IndexDataType>;
|
||||
InElementwiseOperation in_elementwise_op(divider);
|
||||
AccElementwiseOperation acc_elementwise_op(divider);
|
||||
|
||||
if constexpr(NumInvariantDim == 0)
|
||||
{
|
||||
AccDataType accuVal = ReduceOpZeroVal<AccDataType, ReduceOpId>();
|
||||
AccDataType accuVal = ReduceOperation::GetIdentityValue();
|
||||
IndexDataType accuIndex = 0;
|
||||
|
||||
for(std::size_t i = 0; i < reduce_dim_indexes.size(); i++)
|
||||
@@ -219,15 +215,14 @@ struct ReductionHost
|
||||
|
||||
auto currVal = type_convert<AccDataType>(in_data[offset_reduce]);
|
||||
|
||||
preUnaryOp(currVal);
|
||||
in_elementwise_op(currVal, currVal);
|
||||
|
||||
auto currIndex = static_cast<IndexDataType>(i);
|
||||
|
||||
binop_with_index_and_nan_check<AccDataType, IndexDataType, PropagateNan>(
|
||||
opReduce2, accuVal, currVal, accuIndex, currIndex);
|
||||
Accumulation::Calculate(accuVal, currVal, accuIndex, currIndex);
|
||||
};
|
||||
|
||||
posUnaryOp(accuVal);
|
||||
acc_elementwise_op(accuVal, accuVal);
|
||||
|
||||
if(!float_equal_one{}(alpha))
|
||||
accuVal *= type_convert<AccDataType>(alpha);
|
||||
@@ -241,7 +236,7 @@ struct ReductionHost
|
||||
else
|
||||
{
|
||||
auto thread_reduce_func = [&](auto invariant_index) {
|
||||
AccDataType accuVal = ReduceOpZeroVal<AccDataType, ReduceOpId>();
|
||||
AccDataType accuVal = ReduceOperation::GetIdentityValue();
|
||||
IndexDataType accuIndex = 0;
|
||||
|
||||
auto offset_invariant =
|
||||
@@ -255,15 +250,14 @@ struct ReductionHost
|
||||
auto currVal =
|
||||
type_convert<AccDataType>(in_data[offset_invariant + offset_reduce]);
|
||||
|
||||
preUnaryOp(currVal);
|
||||
in_elementwise_op(currVal, currVal);
|
||||
|
||||
auto currIndex = static_cast<IndexDataType>(i);
|
||||
|
||||
binop_with_index_and_nan_check<AccDataType, IndexDataType, PropagateNan>(
|
||||
opReduce2, accuVal, currVal, accuIndex, currIndex);
|
||||
Accumulation::Calculate(accuVal, currVal, accuIndex, currIndex);
|
||||
};
|
||||
|
||||
posUnaryOp(accuVal);
|
||||
acc_elementwise_op(accuVal, accuVal);
|
||||
|
||||
if(!float_equal_one{}(alpha))
|
||||
accuVal *= type_convert<AccDataType>(alpha);
|
||||
@@ -308,15 +302,16 @@ struct ReductionHost
|
||||
using ck::float_equal_one;
|
||||
using ck::float_equal_zero;
|
||||
using ck::type_convert;
|
||||
using ck::host_reduce::binop_with_nan_check;
|
||||
using ck::host_reduce::ReduceOpFn;
|
||||
using ck::host_reduce::ReduceOpZeroVal;
|
||||
|
||||
auto opReduce = ReduceOpFn<AccDataType, ReduceOpId>();
|
||||
using Accumulation =
|
||||
ck::detail::AccumulateWithNanCheck<PropagateNan, ReduceOperation, AccDataType>;
|
||||
|
||||
InElementwiseOperation in_elementwise_op(divider);
|
||||
AccElementwiseOperation acc_elementwise_op(divider);
|
||||
|
||||
if constexpr(NumInvariantDim == 0)
|
||||
{
|
||||
AccDataType accuVal = ReduceOpZeroVal<AccDataType, ReduceOpId>();
|
||||
AccDataType accuVal = ReduceOperation::GetIdentityValue();
|
||||
|
||||
for(const auto& reduce_index : reduce_dim_indexes)
|
||||
{
|
||||
@@ -325,12 +320,12 @@ struct ReductionHost
|
||||
|
||||
auto currVal = type_convert<AccDataType>(in_data[offset_reduce]);
|
||||
|
||||
preUnaryOp(currVal);
|
||||
in_elementwise_op(currVal, currVal);
|
||||
|
||||
binop_with_nan_check<AccDataType, PropagateNan>(opReduce, accuVal, currVal);
|
||||
Accumulation::Calculate(accuVal, currVal);
|
||||
};
|
||||
|
||||
posUnaryOp(accuVal);
|
||||
acc_elementwise_op(accuVal, accuVal);
|
||||
|
||||
if(!float_equal_one{}(alpha))
|
||||
accuVal *= type_convert<AccDataType>(alpha);
|
||||
@@ -343,7 +338,7 @@ struct ReductionHost
|
||||
else
|
||||
{
|
||||
auto thread_reduce_func = [&](auto invariant_index) {
|
||||
AccDataType accuVal = ReduceOpZeroVal<AccDataType, ReduceOpId>();
|
||||
AccDataType accuVal = ReduceOperation::GetIdentityValue();
|
||||
|
||||
auto offset_invariant =
|
||||
get_offset_from_index<NumInvariantDim>(invariantStrides, invariant_index);
|
||||
@@ -356,12 +351,12 @@ struct ReductionHost
|
||||
auto currVal =
|
||||
type_convert<AccDataType>(in_data[offset_invariant + offset_reduce]);
|
||||
|
||||
preUnaryOp(currVal);
|
||||
in_elementwise_op(currVal, currVal);
|
||||
|
||||
binop_with_nan_check<AccDataType, PropagateNan>(opReduce, accuVal, currVal);
|
||||
Accumulation::Calculate(accuVal, currVal);
|
||||
};
|
||||
|
||||
posUnaryOp(accuVal);
|
||||
acc_elementwise_op(accuVal, accuVal);
|
||||
|
||||
if(!float_equal_one{}(alpha))
|
||||
accuVal *= type_convert<AccDataType>(alpha);
|
||||
|
||||
Reference in New Issue
Block a user