mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 01:10:17 +00:00
* Squashed 'src/composable_kernel/' content from commitf6edda611git-subtree-dir: src/composable_kernel git-subtree-split:f6edda6119* add solver ConvIgemmFwdV6r1DlopsNchwKcyxNkhw; rename static ck source files * Squashed 'src/composable_kernel/' changes from f6edda611..5781adf5c5781adf5cUpdate develop (#5) (#6)97e6d514fMerge pull request #4 from ROCmSoftwarePlatform/separate_online_compile7b1ec41e5refactor49c33aaearefactor54b3e73d1rename git-subtree-dir: src/composable_kernel git-subtree-split:5781adf5cf* fix * refactor * remove online compilation from CK * refactor * fix * add ctest * add c-style pointer cast * vector/scalar pointer cast use c-style pointer cast instead of reinterpret_cast * fix clang warning suppression * tidy * suppress cppcheck * fix enum issue * revert chagnes to hip build * fix kernel filename * update CK build script * rename * rename * make innner product compatiable on gfx900 * Update src/include/miopen/solver/ck_utility_common.hpp Co-authored-by: JD <Jehandad.Khan@amd.com> * compiler parameter use stream * use int instead of index_t in kernel wrapper * DynamicBuffer, StaticBuffer, amd_buffer_load support customized value for invalid element * refactor * refactor * change cmakelist * change ck common utility * fix Co-authored-by: JD <Jehandad.Khan@amd.com>
63 lines
1.8 KiB
C++
63 lines
1.8 KiB
C++
#ifndef CK_FUNCTIONAL4_HPP
|
|
#define CK_FUNCTIONAL4_HPP
|
|
|
|
#include "sequence.hpp"
|
|
#include "tuple.hpp"
|
|
#include "array.hpp"
|
|
|
|
namespace ck {
|
|
|
|
namespace detail {
|
|
|
|
template <typename Indices>
|
|
struct unpack_impl;
|
|
|
|
template <index_t... Is>
|
|
struct unpack_impl<Sequence<Is...>>
|
|
{
|
|
template <typename F, typename X>
|
|
__host__ __device__ constexpr auto operator()(F&& f, X&& x) const
|
|
{
|
|
return std::forward<F>(f)(std::forward<X>(x).At(Number<Is>{})...);
|
|
}
|
|
};
|
|
|
|
template <typename Seq0, typename Seq1>
|
|
struct unpack2_impl;
|
|
|
|
// TODO: remove this, after properly implementing unpack that takes any number of containers
|
|
template <index_t... Is, index_t... Js>
|
|
struct unpack2_impl<Sequence<Is...>, Sequence<Js...>>
|
|
{
|
|
template <typename F, typename X, typename Y>
|
|
__host__ __device__ constexpr auto operator()(F&& f, X&& x, Y&& y) const
|
|
{
|
|
return std::forward<F>(f)(std::forward<X>(x).At(Number<Is>{})...,
|
|
std::forward<Y>(y).At(Number<Js>{})...);
|
|
}
|
|
};
|
|
|
|
} // namespace detail
|
|
|
|
template <typename F, typename X>
|
|
__host__ __device__ constexpr auto unpack(F&& f, X&& x)
|
|
{
|
|
using X_ = remove_reference_t<X>;
|
|
return detail::unpack_impl<typename arithmetic_sequence_gen<0, X_::Size(), 1>::type>{}(
|
|
std::forward<F>(f), std::forward<X>(x));
|
|
}
|
|
|
|
// TODO: properly implement unpack that takes any number of containers
|
|
template <typename F, typename X, typename Y>
|
|
__host__ __device__ constexpr auto unpack2(F&& f, X&& x, Y&& y)
|
|
{
|
|
using X_ = remove_reference_t<X>;
|
|
using Y_ = remove_reference_t<Y>;
|
|
return detail::unpack2_impl<typename arithmetic_sequence_gen<0, X_::Size(), 1>::type,
|
|
typename arithmetic_sequence_gen<0, Y_::Size(), 1>::type>{}(
|
|
std::forward<F>(f), std::forward<X>(x), std::forward<Y>(y));
|
|
}
|
|
|
|
} // namespace ck
|
|
#endif
|