Files
composable_kernel/host/driver_offline/CMakeLists.txt
zjing14 970fa3e92e v5r1 fusion kernels for inference (#49)
* init

* refactor for 1x1

* rename e0_e1

* add e1 with bugs

* debug

* fixed

* fixed e1

* add timer

* imprve threadwise gemm with dot2

* add e2

* tuning

* seperate c2

* add nhwc

* restore nchwc

* clean

* opt

* fixed; tuning

* add BGlobalMoveSliceWindowStepHacks{}

* tuning

* repeat running

* adjust

* merge v5r1 nchwc

* add adaptors

* split k0 k1 in c_thread_grid

* split h and w

* remove v5r1 nhwc

* clean for pr

* remove host_conv_add

* clean code

* clean

* add dynamic support

* static mode

* test static

* add conv+add fusion

* fixed validation

* naming fix

* use activ_enum

* make static

* refactor conv_add for InMem::add

* add bias

* add conv_out

* add configurable makeddesc

* add maxpool fusion

* add maxpool host for validation

* enable static desc

* conv-only use v5r1_add

* test

* test

* for binary dumps

* fixed incorrect results due to typo

* clean

* debugging maxpool

* workaround with offset trick

* clean code

* modularize ops of fusion

* add gridwise_gemm_v3

* create seperate fusion fun

* enable dynamic mode of conv and conv+resize_add

* add dynamic mode of maxpool

* add pass by point

* add activ_type as arguments

* merge develop

* clean

* reset config to old default

Co-authored-by: Chao Liu <chao.liu2@amd.com>
2021-11-18 08:34:07 -06:00

38 lines
2.2 KiB
CMake

include_directories(BEFORE
include
${PROJECT_SOURCE_DIR}/host/host_tensor/include
${PROJECT_SOURCE_DIR}/host/device/include
${PROJECT_SOURCE_DIR}/host/solver/include
${PROJECT_SOURCE_DIR}/composable_kernel/include
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation
${PROJECT_SOURCE_DIR}/composable_kernel/include/problem_transform
${PROJECT_SOURCE_DIR}/composable_kernel/include/driver
${PROJECT_SOURCE_DIR}/external/rocm/include
)
set(CONV_FWD_DRIVER_OFFLINE_SOURCE src/conv_fwd_driver_offline.cpp)
set(CONV_FWD_DRIVER_OFFLINE_NCHWC_SOURCE src/conv_fwd_driver_offline_nchwc.cpp)
set(CONV_ADD_FWD_DRIVER_OFFLINE_NCHWC_SOURCE src/conv_add_fwd_driver_offline_nchwc.cpp)
set(CONV_MAXPOOL_FWD_DRIVER_OFFLINE_NCHWC_SOURCE src/conv_maxpool_fwd_driver_offline_nchwc.cpp)
set(CONV_BWD_DRIVER_OFFLINE_SOURCE src/conv_bwd_driver_offline.cpp)
set(CONV_WRW_DRIVER_OFFLINE_SOURCE src/conv_wrw_driver_offline.cpp)
set(GEMM_DRIVER_OFFLINE_SOURCE src/gemm_driver_offline.cpp)
add_executable(conv_fwd_driver_offline ${CONV_FWD_DRIVER_OFFLINE_SOURCE})
add_executable(conv_fwd_driver_offline_nchwc ${CONV_FWD_DRIVER_OFFLINE_NCHWC_SOURCE})
add_executable(conv_add_fwd_driver_offline_nchwc ${CONV_ADD_FWD_DRIVER_OFFLINE_NCHWC_SOURCE})
add_executable(conv_maxpool_fwd_driver_offline_nchwc ${CONV_MAXPOOL_FWD_DRIVER_OFFLINE_NCHWC_SOURCE})
add_executable(conv_bwd_driver_offline ${CONV_BWD_DRIVER_OFFLINE_SOURCE})
add_executable(conv_wrw_driver_offline ${CONV_WRW_DRIVER_OFFLINE_SOURCE})
add_executable(gemm_driver_offline ${GEMM_DRIVER_OFFLINE_SOURCE})
target_link_libraries(conv_fwd_driver_offline PRIVATE host_tensor)
target_link_libraries(conv_fwd_driver_offline_nchwc PRIVATE host_tensor)
target_link_libraries(conv_add_fwd_driver_offline_nchwc PRIVATE host_tensor)
target_link_libraries(conv_maxpool_fwd_driver_offline_nchwc PRIVATE host_tensor)
target_link_libraries(conv_bwd_driver_offline PRIVATE host_tensor)
target_link_libraries(conv_wrw_driver_offline PRIVATE host_tensor)
target_link_libraries(gemm_driver_offline PRIVATE host_tensor)