Files
composable_kernel/driver/include/olc_driver_common.hpp
Qianfeng 1685048a67 Add online compilation for dynamic kernels (#37)
* Add online-compiling facility

* Synchronize from fwd-v4r5 and implement host interfaces to call conv-fwd v4r4/v4r5 using on-line compiling method

* Tiny adjustment to time reporting

* Use object assignment to replace explicit bytes copying in the first kernel of v4r4/v4r5

* Use single thread to assign descriptor object to device memory

* Adjust to the workload assignment of the two kernels of v4r4 (experimental)

* Revert "Adjust to the workload assignment of the two kernels of v4r4 (experimental)"

This reverts commit eb38461456bb0c82b6c0d32cdd616e181907e20c.

* Update to make constexpr for generating descriptor types in kernel 2 of dynamic conv-fwd v4r4

* Update to dynamic conv-fwd v4r4 online-compiling

* Update to dynamic conv-fwd v4r5 online-compiling (result not accurate)

* Tiny update to driver/CMakeLists.txt

* clang-format

* Tiny comments change

* Add env OLC_DUMP_SAVE_TMP_DIR to support saving of temperary dir

* Fwd v4r5 olc perf (#39)

* added hip-clang flags that fix perf issue of online compilation

* fix bug for olc fwd-v4r5-nchw

* Move constexpr and type reference statements out of the function body in conv-fwd v4r4/v4r5 kernel wrapper

* Remove printing in hip_build_utils.cpp

* Update to root CMakeLists.txt

* Revert "Move constexpr and type reference statements out of the function body in conv-fwd v4r4/v4r5 kernel wrapper"

This reverts commit 3d2c5d8ecdd8298b72d127110500ed5b38d9835c.

Co-authored-by: Chao Liu <chao.liu2@amd.com>
Co-authored-by: Chao Liu <lc.roy86@gmail.com>
Co-authored-by: root <root@dc-smc-18.amd.com>
2021-06-24 08:34:19 -05:00

115 lines
2.2 KiB
C++

#ifndef OLC_DRIVER_COMMON_HPP
#define OLC_DRIVER_COMMON_HPP
#include <half.hpp>
#include <vector>
#include <cassert>
// this enumerate should be synchronized with include/miopen.h
typedef enum {
appHalf = 0,
appFloat = 1,
appInt32 = 2,
appInt8 = 3,
appInt8x4 = 4,
appBFloat16 = 5,
appDouble = 6,
} appDataType_t;
namespace Driver {
template <appDataType_t typeNum>
struct get_type_from_type_enum
{
using type = float;
};
template <>
struct get_type_from_type_enum<appHalf>
{
using type = half_float::half;
};
template <>
struct get_type_from_type_enum<appFloat>
{
using type = float;
};
template <>
struct get_type_from_type_enum<appDouble>
{
using type = double;
};
template <>
struct get_type_from_type_enum<appInt32>
{
using type = int;
};
static inline int get_typeid_from_type_enum(appDataType_t t)
{
switch(t)
{
case appHalf: return (static_cast<int>('H'));
case appFloat: return (static_cast<int>('F'));
case appBFloat16: return (static_cast<int>('B'));
case appDouble: return (static_cast<int>('D'));
case appInt8:
case appInt8x4:
case appInt32: return (static_cast<int>('O'));
default: throw std::runtime_error("Only float, half, bfloat16 data type is supported."); break;
};
};
template <typename T>
static inline int get_typeid_from_type()
{
throw std::runtime_error("Unsupported typeid conversion for this type!");
};
template <>
inline int get_typeid_from_type<float>()
{
return (static_cast<int>('F'));
};
template <>
inline int get_typeid_from_type<half_float::half>()
{
return (static_cast<int>('H'));
};
template <>
inline int get_typeid_from_type<double>()
{
return (static_cast<int>('D'));
};
static inline float get_effective_average(std::vector<float>& values)
{
assert(!values.empty());
if(values.size() == 1)
return (values[0]);
else
{
float sum = 0.0f;
float maxVal = 0.0f;
for(const auto val : values)
{
if(maxVal < val)
maxVal = val;
sum += val;
};
return ((sum - maxVal) / (values.size() - 1));
};
};
} // namespace Driver
#endif