mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-24 14:54:47 +00:00
General 2D Reduction Kernel (#2535)
* General 2D Reduction Kernel
* Move the reduction kernel from the example
* Split the code and add the necessary policy, problem, shape files as
per ck_tile convention
* Add/modify the headers
* Modified the example to work with the 'new' kernel
* Added tests for the kernel
* N-D refernce reduce
* Added support for N-D input with transform to 2D
* Added padding to support various input sized tensors
* Bug fix in the thread buffer constructor
* Some comments to explain the reduce2d block kernel
* comments resolution
* clang-format
* comments resolution
* clang-format
* clang-format
* comments resolution
* clang-format
[ROCm/composable_kernel commit: 4750b293fe]
This commit is contained in:
committed by
GitHub
parent
b7659e284a
commit
b6f0e98da6
@@ -42,7 +42,11 @@ struct thread_buffer {
|
||||
|
||||
// TODO: this ctor can't ignore
|
||||
CK_TILE_HOST_DEVICE constexpr thread_buffer() : data{} {}
|
||||
CK_TILE_HOST_DEVICE constexpr thread_buffer(const value_type & o) : data{o} {}
|
||||
CK_TILE_HOST_DEVICE constexpr thread_buffer(const value_type & o) : data{} {
|
||||
static_for<0, N, 1>{}(
|
||||
[&](auto i) { data[i] = o; }
|
||||
);
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE static constexpr auto size() { return N; }
|
||||
CK_TILE_HOST_DEVICE auto & get() {return data; }
|
||||
|
||||
@@ -26,7 +26,8 @@ struct Add
|
||||
}
|
||||
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<std::is_same_v<T, half_t> || std::is_same_v<T, bf16_t>>>
|
||||
typename = std::enable_if_t<std::is_same_v<T, half_t> || std::is_same_v<T, bf16_t> ||
|
||||
std::is_same_v<T, fp8_t> || std::is_same_v<T, bf8_t>>>
|
||||
CK_TILE_HOST_DEVICE constexpr T operator()(T& y, T x) const
|
||||
{
|
||||
float y_ = type_convert<float>(y);
|
||||
@@ -34,6 +35,8 @@ struct Add
|
||||
|
||||
return type_convert<T>(y_ + x_);
|
||||
}
|
||||
|
||||
static constexpr bool requires_special_combine = false;
|
||||
};
|
||||
|
||||
struct SquareAdd
|
||||
@@ -51,13 +54,47 @@ struct SquareAdd
|
||||
{
|
||||
return y + (x * x);
|
||||
}
|
||||
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<std::is_same_v<T, half_t> || std::is_same_v<T, bf16_t> ||
|
||||
std::is_same_v<T, fp8_t> || std::is_same_v<T, bf8_t>>>
|
||||
CK_TILE_HOST_DEVICE constexpr T operator()(T& y, T x) const
|
||||
{
|
||||
float y_ = type_convert<float>(y);
|
||||
float x_ = type_convert<float>(x);
|
||||
return type_convert<T>(y_ + (x_ * x_));
|
||||
}
|
||||
|
||||
// For combining partial results
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<std::is_same_v<T, float> || std::is_same_v<T, double> ||
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t>>>
|
||||
CK_TILE_HOST_DEVICE constexpr T combine_partial_results(const T& partial1,
|
||||
const T& partial2) const
|
||||
{
|
||||
return partial1 + partial2; // Just add the partial sums, don't square again
|
||||
}
|
||||
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<std::is_same_v<T, half_t> || std::is_same_v<T, bf16_t> ||
|
||||
std::is_same_v<T, fp8_t> || std::is_same_v<T, bf8_t>>>
|
||||
CK_TILE_HOST_DEVICE constexpr T combine_partial_results(T& partial1, T& partial2) const
|
||||
{
|
||||
float partial1_ = type_convert<float>(partial1);
|
||||
float partial2_ = type_convert<float>(partial2);
|
||||
return type_convert<T>(partial1_ + partial2_);
|
||||
}
|
||||
|
||||
static constexpr bool requires_special_combine = true;
|
||||
};
|
||||
|
||||
struct Max
|
||||
{
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<std::is_same_v<T, float> || std::is_same_v<T, double> ||
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t>>>
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t> ||
|
||||
std::is_same_v<T, half_t> || std::is_same_v<T, bf16_t> ||
|
||||
std::is_same_v<T, fp8_t> || std::is_same_v<T, bf8_t>>>
|
||||
CK_TILE_HOST_DEVICE static constexpr T GetIdentityValue()
|
||||
{
|
||||
return numeric<T>::min();
|
||||
@@ -65,18 +102,24 @@ struct Max
|
||||
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<std::is_same_v<T, float> || std::is_same_v<T, double> ||
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t>>>
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t> ||
|
||||
std::is_same_v<T, half_t> || std::is_same_v<T, bf16_t> ||
|
||||
std::is_same_v<T, fp8_t> || std::is_same_v<T, bf8_t>>>
|
||||
CK_TILE_HOST_DEVICE constexpr T operator()(const T& y, const T x) const
|
||||
{
|
||||
return max(y, x);
|
||||
}
|
||||
|
||||
static constexpr bool requires_special_combine = false;
|
||||
};
|
||||
|
||||
struct AbsMax
|
||||
{
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<std::is_same_v<T, float> || std::is_same_v<T, double> ||
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t>>>
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t> ||
|
||||
std::is_same_v<T, half_t> || std::is_same_v<T, bf16_t> ||
|
||||
std::is_same_v<T, fp8_t> || std::is_same_v<T, bf8_t>>>
|
||||
CK_TILE_HOST_DEVICE static constexpr T GetIdentityValue()
|
||||
{
|
||||
return numeric<T>::min();
|
||||
@@ -84,11 +127,15 @@ struct AbsMax
|
||||
|
||||
template <typename T,
|
||||
typename = std::enable_if_t<std::is_same_v<T, float> || std::is_same_v<T, double> ||
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t>>>
|
||||
std::is_same_v<T, int32_t> || std::is_same_v<T, int8_t> ||
|
||||
std::is_same_v<T, half_t> || std::is_same_v<T, bf16_t> ||
|
||||
std::is_same_v<T, fp8_t> || std::is_same_v<T, bf8_t>>>
|
||||
CK_TILE_HOST_DEVICE constexpr T operator()(const T& y, const T x) const
|
||||
{
|
||||
return max(y, abs(x));
|
||||
}
|
||||
|
||||
static constexpr bool requires_special_combine = false;
|
||||
};
|
||||
|
||||
} // namespace ReduceOp
|
||||
|
||||
Reference in New Issue
Block a user