mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
* init for v4r4 xdlops olc * refactor wrap * init impl of v4r4 nchw xdlops olc * tuning * test perf * fixed v4r4 nhwc * tuned v4r4 nhwc * use gridwise_gemm_xdlops_v2r3 * swap a/b * add pointer support into offline v2r3 * debugging v4r4r4 transform for olc * change timer of olc * refactor v4r4 xdlops nchw olc * remove transform fun in v4r4 xdlops nhwc olc Co-authored-by: Chao Liu <chao.liu2@amd.com>
45 lines
982 B
C++
45 lines
982 B
C++
#ifndef CK_COMMON_HEADER_HPP
|
|
#define CK_COMMON_HEADER_HPP
|
|
|
|
#include "config.hpp"
|
|
#include "array.hpp"
|
|
#include "container_helper.hpp"
|
|
#include "statically_indexed_array.hpp"
|
|
#include "container_element_picker.hpp"
|
|
#include "multi_index.hpp"
|
|
#include "data_type.hpp"
|
|
#include "float_type.hpp"
|
|
#include "functional.hpp"
|
|
#include "functional2.hpp"
|
|
#include "functional3.hpp"
|
|
#include "functional4.hpp"
|
|
#include "integral_constant.hpp"
|
|
#include "math.hpp"
|
|
#include "number.hpp"
|
|
#include "sequence.hpp"
|
|
#include "sequence_helper.hpp"
|
|
#include "synchronization.hpp"
|
|
#include "tuple.hpp"
|
|
#include "tuple_helper.hpp"
|
|
#include "type.hpp"
|
|
#include "utility.hpp"
|
|
#include "magic_division.hpp"
|
|
#include "amd_buffer_addressing_v2.hpp"
|
|
#include "static_buffer.hpp"
|
|
#include "dynamic_buffer.hpp"
|
|
|
|
// TODO: remove this
|
|
#if CK_USE_AMD_INLINE_ASM
|
|
#include "amd_inline_asm.hpp"
|
|
#endif
|
|
|
|
#if CK_USE_AMD_DLOP
|
|
#include "amd_dlop.hpp"
|
|
#endif
|
|
|
|
#if CK_USE_AMD_XDLOPS
|
|
#include "amd_xdlops.hpp"
|
|
#endif
|
|
|
|
#endif
|