mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 17:26:00 +00:00
* Add online-compiling facility * Synchronize from fwd-v4r5 and implement host interfaces to call conv-fwd v4r4/v4r5 using on-line compiling method * Tiny adjustment to time reporting * Use object assignment to replace explicit bytes copying in the first kernel of v4r4/v4r5 * Use single thread to assign descriptor object to device memory * Adjust to the workload assignment of the two kernels of v4r4 (experimental) * Revert "Adjust to the workload assignment of the two kernels of v4r4 (experimental)" This reverts commit eb38461456bb0c82b6c0d32cdd616e181907e20c. * Update to make constexpr for generating descriptor types in kernel 2 of dynamic conv-fwd v4r4 * Update to dynamic conv-fwd v4r4 online-compiling * Update to dynamic conv-fwd v4r5 online-compiling (result not accurate) * Tiny update to driver/CMakeLists.txt * clang-format * Tiny comments change * Add env OLC_DUMP_SAVE_TMP_DIR to support saving of temperary dir * Fwd v4r5 olc perf (#39) * added hip-clang flags that fix perf issue of online compilation * fix bug for olc fwd-v4r5-nchw * Move constexpr and type reference statements out of the function body in conv-fwd v4r4/v4r5 kernel wrapper * Remove printing in hip_build_utils.cpp * Update to root CMakeLists.txt * Revert "Move constexpr and type reference statements out of the function body in conv-fwd v4r4/v4r5 kernel wrapper" This reverts commit 3d2c5d8ecdd8298b72d127110500ed5b38d9835c. Co-authored-by: Chao Liu <chao.liu2@amd.com> Co-authored-by: Chao Liu <lc.roy86@gmail.com> Co-authored-by: root <root@dc-smc-18.amd.com>
36 lines
944 B
C++
36 lines
944 B
C++
#ifndef OLC_GUARD_MLOPEN_OP_KERNEL_ARGS_HPP
|
|
#define OLC_GUARD_MLOPEN_OP_KERNEL_ARGS_HPP
|
|
|
|
#include <type_traits>
|
|
#include <cstdint>
|
|
#include <half.hpp>
|
|
|
|
#include <boost/container/small_vector.hpp>
|
|
struct OpKernelArg
|
|
{
|
|
|
|
OpKernelArg(char val, size_t sz) : buffer(sz) { std::fill(buffer.begin(), buffer.end(), val); }
|
|
|
|
template <typename T>
|
|
OpKernelArg(T arg) : buffer(sizeof(T))
|
|
{
|
|
static_assert(std::is_trivial<T>{} || std::is_same<T, half_float::half>{},
|
|
"Only for trivial types");
|
|
*(reinterpret_cast<T*>(buffer.data())) = arg;
|
|
}
|
|
|
|
template <typename T>
|
|
OpKernelArg(T* arg) // NOLINT
|
|
: buffer(sizeof(T*))
|
|
{
|
|
*(reinterpret_cast<T**>(buffer.data())) = arg;
|
|
is_ptr = true;
|
|
}
|
|
|
|
std::size_t size() const { return buffer.size(); };
|
|
boost::container::small_vector<char, 8> buffer;
|
|
bool is_ptr = false;
|
|
};
|
|
|
|
#endif
|