From 1566b31736d191fe3a43dd5efa59968e44191729 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Thu, 13 Jun 2019 15:12:12 -0500 Subject: [PATCH] reorginzed files --- CMakeLists.txt | 17 ++++++++++--- .../gridwise_convolution_kernel_wrapper.hpp | 0 ...e_convolution_direct_v2_nchw_kcyx_nkhw.hpp | 12 +++++----- ...tion_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp | 16 ++++++------- ...tion_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp | 18 +++++++------- ...tion_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp | 16 ++++++------- ..._v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp | 16 ++++++------- ...tion_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp | 16 ++++++------- ..._v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp | 16 ++++++------- ...lution_implicit_gemm_v2_chwn_cyxk_khwn.hpp | 12 +++++----- ...mm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp | 14 +++++------ ...lution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp | 12 +++++----- ...mm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp | 12 +++++----- ...lution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp | 14 +++++------ ...mm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp | 24 +++++++++++-------- ...onvolution_2_vectorized_nchw_kcyx_nkhw.hpp | 14 +++++------ ...mm_convolution_1_chwn_cyxk_khwn_padded.hpp | 14 +++++------ .../ConstantMatrixDescriptor.hpp | 2 +- .../ConstantMergedTensorDescriptor.hpp | 4 ++-- .../ConstantTensorDescriptor.hpp | 2 +- .../blockwise_2d_tensor_op.hpp | 4 ++-- .../blockwise_3d_tensor_op.hpp | 4 ++-- .../blockwise_4d_tensor_op.hpp | 5 ++-- .../blockwise_batched_gemm.hpp | 4 +++- .../tensor_operation/blockwise_gemm.hpp | 5 ++-- .../blockwise_generic_tensor_slice_copy.hpp | 5 +++- .../blockwise_tensor_slice_copy.hpp | 4 +++- .../threadwise_4d_tensor_op.hpp | 3 ++- .../threadwise_direct_convolution.hpp | 5 ++-- .../tensor_operation/threadwise_gemm.hpp | 4 ++-- .../threadwise_generic_tensor_op.hpp | 5 ++-- .../threadwise_generic_tensor_slice_copy.hpp | 5 ++-- .../threadwise_tensor_slice_copy.hpp | 3 ++- .../include}/utility/Array.hpp | 4 ++-- .../include}/utility/Sequence.hpp | 4 ++-- .../include}/utility/amd_inline_asm.hpp | 2 +- .../include/utility/common_header.hpp | 18 ++++++++++++++ .../include}/utility/config_amd.hpp.in | 4 ++-- .../include}/utility/config_nvidia.hpp.in | 4 ++-- .../include}/utility/functional.hpp | 4 ++-- .../include}/utility/functional2.hpp | 4 ++-- .../include}/utility/functional3.hpp | 8 +++---- .../include}/utility/integral_constant.hpp | 0 .../include}/utility/utility.hpp | 2 ++ .../include}/utility/vector_type.hpp | 4 ++-- driver/CMakeLists.txt | 20 ++++++++++++++-- {include => driver/include}/conv_common.hpp | 2 +- {include => driver/include}/device.hpp | 2 +- ...e_convolution_direct_v2_nchw_kcyx_nkhw.hpp | 3 ++- ...lution_implicit_gemm_v1_chwn_cyxk_khwn.hpp | 9 +++---- ...lution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp | 5 ++-- ...lution_implicit_gemm_v2_chwn_cyxk_khwn.hpp | 5 ++-- ...lution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp | 5 ++-- ...lution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp | 5 ++-- ...onvolution_2_vectorized_nchw_kcyx_nkhw.hpp | 3 ++- ...mm_convolution_1_chwn_cyxk_khwn_padded.hpp | 3 ++- {include => driver/include}/tensor.hpp | 0 driver/src/CMakeLists.txt | 0 {src => driver/src}/device.cpp | 2 +- driver/{ => src}/driver.cpp | 6 ++--- driver/{ => src}/driver.cu | 0 {src => driver/src}/tensor.cpp | 0 include/composable_kernel/utility/common.hpp | 17 ------------- src/CMakeLists.txt | 20 ---------------- 64 files changed, 254 insertions(+), 218 deletions(-) rename {include => composable_kernel/include}/gridwise_convolution_kernel_wrapper.hpp (100%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp (96%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp (96%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp (96%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp (96%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp (95%) rename {include/composable_kernel => composable_kernel/include}/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp (96%) rename {include/composable_kernel => composable_kernel/include}/tensor_description/ConstantMatrixDescriptor.hpp (97%) rename {include/composable_kernel => composable_kernel/include}/tensor_description/ConstantMergedTensorDescriptor.hpp (98%) rename {include/composable_kernel => composable_kernel/include}/tensor_description/ConstantTensorDescriptor.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/blockwise_2d_tensor_op.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/blockwise_3d_tensor_op.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/blockwise_4d_tensor_op.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/blockwise_batched_gemm.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/blockwise_gemm.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/blockwise_generic_tensor_slice_copy.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/blockwise_tensor_slice_copy.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/threadwise_4d_tensor_op.hpp (95%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/threadwise_direct_convolution.hpp (98%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/threadwise_gemm.hpp (96%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/threadwise_generic_tensor_op.hpp (73%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/threadwise_generic_tensor_slice_copy.hpp (96%) rename {include/composable_kernel => composable_kernel/include}/tensor_operation/threadwise_tensor_slice_copy.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/utility/Array.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/utility/Sequence.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/utility/amd_inline_asm.hpp (99%) create mode 100644 composable_kernel/include/utility/common_header.hpp rename {include/composable_kernel => composable_kernel/include}/utility/config_amd.hpp.in (94%) rename {include/composable_kernel => composable_kernel/include}/utility/config_nvidia.hpp.in (95%) rename {include/composable_kernel => composable_kernel/include}/utility/functional.hpp (93%) rename {include/composable_kernel => composable_kernel/include}/utility/functional2.hpp (93%) rename {include/composable_kernel => composable_kernel/include}/utility/functional3.hpp (93%) rename {include/composable_kernel => composable_kernel/include}/utility/integral_constant.hpp (100%) rename {include/composable_kernel => composable_kernel/include}/utility/utility.hpp (99%) rename {include/composable_kernel => composable_kernel/include}/utility/vector_type.hpp (96%) rename {include => driver/include}/conv_common.hpp (98%) rename {include => driver/include}/device.hpp (96%) rename driver/{ => include}/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp (97%) rename driver/{ => include}/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp (97%) rename driver/{ => include}/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp (98%) rename driver/{ => include}/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp (98%) rename driver/{ => include}/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp (96%) rename driver/{ => include}/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp (96%) rename driver/{ => include}/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp (98%) rename driver/{ => include}/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp (99%) rename {include => driver/include}/tensor.hpp (100%) create mode 100644 driver/src/CMakeLists.txt rename {src => driver/src}/device.cpp (98%) rename driver/{ => src}/driver.cpp (99%) rename driver/{ => src}/driver.cu (100%) rename {src => driver/src}/tensor.cpp (100%) delete mode 100644 include/composable_kernel/utility/common.hpp delete mode 100644 src/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cbca2bb77..55727cc7c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,8 +46,19 @@ endif() # include_directories(BEFORE - include - ${PROJECT_BINARY_DIR}/include + ${PROJECT_SOURCE_DIR}/composable_kernel/include + ${PROJECT_SOURCE_DIR}/composable_kernel/include/utility + ${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description + ${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation + ${PROJECT_SOURCE_DIR}/composable_kernel/include/kernel_algorithm + ${PROJECT_SOURCE_DIR}/driver/include + ${PROJECT_BINARY_DIR}/composable_kernel/include/utility ) -add_subdirectory(src) + +if(DEVICE_BACKEND STREQUAL "AMD") + configure_file("${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/config_amd.hpp.in" "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/config.hpp") +elseif(DEVICE_BACKEND STREQUAL "NVIDIA") + configure_file("${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/config_nvidia.hpp.in" "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/config.hpp") +endif() + add_subdirectory(driver) diff --git a/include/gridwise_convolution_kernel_wrapper.hpp b/composable_kernel/include/gridwise_convolution_kernel_wrapper.hpp similarity index 100% rename from include/gridwise_convolution_kernel_wrapper.hpp rename to composable_kernel/include/gridwise_convolution_kernel_wrapper.hpp diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp index f788bdd093..4958353849 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp @@ -1,12 +1,12 @@ #ifndef CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW #define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_direct_convolution.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp index 711f3464b6..eca22ce632 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp index 6a3471c7f6..23c1be5272 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp @@ -1,15 +1,15 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_3d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp index 5f6ba76d4b..c54eb80dd9 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp index 3f96d2de9d..f421cfa4c3 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp index a685727b50..447ce4ce52 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_tensor_slice_copy.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_generic_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp index d531f5c0b2..3c71a5afc4 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_tensor_slice_copy.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_generic_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp index dd7e02e715..a1525e74ee 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp @@ -1,12 +1,12 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp index 9839f7de24..31832b2ef9 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp @@ -1,13 +1,13 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp index 67440af681..d1b77f06e7 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp @@ -1,12 +1,12 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_generic_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp index d3cdb640ff..0d5b520c53 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp @@ -1,12 +1,12 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_generic_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp index c97133dfad..f6535b3d7b 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp @@ -1,13 +1,13 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_generic_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" +#include "threadwise_generic_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp index d6badfda8b..38e9360016 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp @@ -1,13 +1,17 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_generic_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" +#include "threadwise_generic_tensor_slice_copy.hpp" + +#ifndef CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM +#define CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM 1 +#endif namespace ck { @@ -233,10 +237,10 @@ struct GridwiseConvolutionImplicitGemm_v4_nchw_kcyx_nkhw_lds_double_buffer // choose GEMM implementation here const auto run_blockwise_gemm = [&](auto... Xs) { -#if 1 - return blockwise_gemm.Run(Xs...); -#else +#if CK_USE_AMD_INLINE_ASM && CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM return blockwise_gemm.Run_asm(Xs...); +#else + return blockwise_gemm.Run(Xs...); #endif }; diff --git a/include/composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp similarity index 95% rename from include/composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp index e160ef8686..00cf53c361 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp @@ -1,11 +1,11 @@ #pragma once -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_direct_convolution.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_direct_convolution.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "threadwise_direct_convolution.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp b/composable_kernel/include/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp index c4dfb03dac..58ecc244a6 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp @@ -1,11 +1,11 @@ #pragma once -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp b/composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp similarity index 97% rename from include/composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp rename to composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp index 36b6efc467..e24f16aa16 100644 --- a/include/composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp +++ b/composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp @@ -1,7 +1,7 @@ #ifndef CK_CONSTANT_MATRIX_DESCRIPTOR_HPP #define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP -#include "composable_kernel/utility/common.hpp" +#include "common_header.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp b/composable_kernel/include/tensor_description/ConstantMergedTensorDescriptor.hpp similarity index 98% rename from include/composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp rename to composable_kernel/include/tensor_description/ConstantMergedTensorDescriptor.hpp index 600577b07b..757f0ad691 100644 --- a/include/composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp +++ b/composable_kernel/include/tensor_description/ConstantMergedTensorDescriptor.hpp @@ -1,8 +1,8 @@ #ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP #define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_description/ConstantTensorDescriptor.hpp b/composable_kernel/include/tensor_description/ConstantTensorDescriptor.hpp similarity index 99% rename from include/composable_kernel/tensor_description/ConstantTensorDescriptor.hpp rename to composable_kernel/include/tensor_description/ConstantTensorDescriptor.hpp index c9415cd9d4..76c3761d10 100644 --- a/include/composable_kernel/tensor_description/ConstantTensorDescriptor.hpp +++ b/composable_kernel/include/tensor_description/ConstantTensorDescriptor.hpp @@ -1,7 +1,7 @@ #ifndef CK_CONSTANT_TENSOR_DESCRIPTOR_HPP #define CK_CONSTANT_TENSOR_DESCRIPTOR_HPP -#include "composable_kernel/utility/common.hpp" +#include "common_header.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp b/composable_kernel/include/tensor_operation/blockwise_2d_tensor_op.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp rename to composable_kernel/include/tensor_operation/blockwise_2d_tensor_op.hpp index 09ef7476cf..9354da7861 100644 --- a/include/composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_2d_tensor_op.hpp @@ -1,8 +1,8 @@ #ifndef CK_BLOCKWISE_2D_TENSOR_OP_HPP #define CK_BLOCKWISE_2D_TENSOR_OP_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp b/composable_kernel/include/tensor_operation/blockwise_3d_tensor_op.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp rename to composable_kernel/include/tensor_operation/blockwise_3d_tensor_op.hpp index e17972a1a9..fb2eafa160 100644 --- a/include/composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_3d_tensor_op.hpp @@ -1,8 +1,8 @@ #ifndef CK_BLOCKWISE_3D_TENSOR_OP_HPP #define CK_BLOCKWISE_3D_TENSOR_OP_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp b/composable_kernel/include/tensor_operation/blockwise_4d_tensor_op.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp rename to composable_kernel/include/tensor_operation/blockwise_4d_tensor_op.hpp index bad94e6f35..6f65ce077d 100644 --- a/include/composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_4d_tensor_op.hpp @@ -1,8 +1,9 @@ #ifndef CK_BLOCKWISE_4D_TENSOR_OP_HPP #define CK_BLOCKWISE_4D_TENSOR_OP_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "threadwise_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_batched_gemm.hpp b/composable_kernel/include/tensor_operation/blockwise_batched_gemm.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_batched_gemm.hpp rename to composable_kernel/include/tensor_operation/blockwise_batched_gemm.hpp index db51bb0b1d..0b1c9e2a53 100644 --- a/include/composable_kernel/tensor_operation/blockwise_batched_gemm.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_batched_gemm.hpp @@ -1,7 +1,9 @@ #ifndef CK_BLOCKWISE_BATCHED_GEMM_HPP #define CK_BLOCKWISE_BATCHED_GEMM_HPP -#include "composable_kernel/tensor_operation/threadwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "threadwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_gemm.hpp b/composable_kernel/include/tensor_operation/blockwise_gemm.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_gemm.hpp rename to composable_kernel/include/tensor_operation/blockwise_gemm.hpp index c045c32c03..f257137f3c 100644 --- a/include/composable_kernel/tensor_operation/blockwise_gemm.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_gemm.hpp @@ -1,8 +1,9 @@ #ifndef CK_BLOCKWISE_GEMM_HPP #define CK_BLOCKWISE_GEMM_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_operation/threadwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "threadwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp b/composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp rename to composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp index 1381b7ef14..1496630543 100644 --- a/include/composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp @@ -1,7 +1,10 @@ #ifndef CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP #define CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "threadwise_generic_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp b/composable_kernel/include/tensor_operation/blockwise_tensor_slice_copy.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp rename to composable_kernel/include/tensor_operation/blockwise_tensor_slice_copy.hpp index d4576279a8..2de6cf1d45 100644 --- a/include/composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_tensor_slice_copy.hpp @@ -1,7 +1,9 @@ #ifndef CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP #define CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "threadwise_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp b/composable_kernel/include/tensor_operation/threadwise_4d_tensor_op.hpp similarity index 95% rename from include/composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp rename to composable_kernel/include/tensor_operation/threadwise_4d_tensor_op.hpp index 1af6e4dc34..a8d0398de1 100644 --- a/include/composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_4d_tensor_op.hpp @@ -1,7 +1,8 @@ #ifndef CK_THREADWISE_4D_TENSOR_OP_HPP #define CK_THREADWISE_4D_TENSOR_OP_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_direct_convolution.hpp b/composable_kernel/include/tensor_operation/threadwise_direct_convolution.hpp similarity index 98% rename from include/composable_kernel/tensor_operation/threadwise_direct_convolution.hpp rename to composable_kernel/include/tensor_operation/threadwise_direct_convolution.hpp index 0332dd9d3d..3e84cbd8b7 100644 --- a/include/composable_kernel/tensor_operation/threadwise_direct_convolution.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_direct_convolution.hpp @@ -1,8 +1,9 @@ #ifndef CK_THREADWISE_DIRECT_CONVOLUTION_HPP #define CK_THREADWISE_DIRECT_CONVOLUTION_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "threadwise_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_gemm.hpp b/composable_kernel/include/tensor_operation/threadwise_gemm.hpp similarity index 96% rename from include/composable_kernel/tensor_operation/threadwise_gemm.hpp rename to composable_kernel/include/tensor_operation/threadwise_gemm.hpp index 5610b65e09..ea77027c10 100644 --- a/include/composable_kernel/tensor_operation/threadwise_gemm.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_gemm.hpp @@ -1,8 +1,8 @@ #ifndef CK_THREADWISE_GEMM_HPP #define CK_THREADWISE_GEMM_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantMatrixDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp b/composable_kernel/include/tensor_operation/threadwise_generic_tensor_op.hpp similarity index 73% rename from include/composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp rename to composable_kernel/include/tensor_operation/threadwise_generic_tensor_op.hpp index beca415604..c0b4e89391 100644 --- a/include/composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_generic_tensor_op.hpp @@ -1,8 +1,9 @@ #ifndef CK_THREADWISE_GENERIC_TENSOR_OP_HPP #define CK_THREADWISE_GENERIC_TENSOR_OP_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" namespace ck { template diff --git a/include/composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp b/composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp similarity index 96% rename from include/composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp rename to composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp index 588766b477..2dd7b79ab2 100644 --- a/include/composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp @@ -1,8 +1,9 @@ #ifndef CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP #define CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp b/composable_kernel/include/tensor_operation/threadwise_tensor_slice_copy.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp rename to composable_kernel/include/tensor_operation/threadwise_tensor_slice_copy.hpp index d48eed6ad1..8d0ce26f94 100644 --- a/include/composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_tensor_slice_copy.hpp @@ -1,7 +1,8 @@ #ifndef CK_THREADWISE_TENSOR_SLICE_COPY_HPP #define CK_THREADWISE_TENSOR_SLICE_COPY_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/utility/Array.hpp b/composable_kernel/include/utility/Array.hpp similarity index 99% rename from include/composable_kernel/utility/Array.hpp rename to composable_kernel/include/utility/Array.hpp index f67f5a6cde..fcf87c5843 100644 --- a/include/composable_kernel/utility/Array.hpp +++ b/composable_kernel/include/utility/Array.hpp @@ -1,8 +1,8 @@ #ifndef CK_ARRAY_HPP #define CK_ARRAY_HPP -#include "composable_kernel/utility/Sequence.hpp" -#include "composable_kernel/utility/functional2.hpp" +#include "Sequence.hpp" +#include "functional2.hpp" namespace ck { diff --git a/include/composable_kernel/utility/Sequence.hpp b/composable_kernel/include/utility/Sequence.hpp similarity index 99% rename from include/composable_kernel/utility/Sequence.hpp rename to composable_kernel/include/utility/Sequence.hpp index 70c3a5dc39..09d67c13fa 100644 --- a/include/composable_kernel/utility/Sequence.hpp +++ b/composable_kernel/include/utility/Sequence.hpp @@ -1,8 +1,8 @@ #ifndef CK_SEQUENCE_HPP #define CK_SEQUENCE_HPP -#include "composable_kernel/utility/integral_constant.hpp" -#include "composable_kernel/utility/functional.hpp" +#include "integral_constant.hpp" +#include "functional.hpp" namespace ck { diff --git a/include/composable_kernel/utility/amd_inline_asm.hpp b/composable_kernel/include/utility/amd_inline_asm.hpp similarity index 99% rename from include/composable_kernel/utility/amd_inline_asm.hpp rename to composable_kernel/include/utility/amd_inline_asm.hpp index 344bc87063..e82cd62db8 100644 --- a/include/composable_kernel/utility/amd_inline_asm.hpp +++ b/composable_kernel/include/utility/amd_inline_asm.hpp @@ -1,7 +1,7 @@ #ifndef CK_AMD_INLINE_ASM_HPP #define CK_AMD_INLINE_ASM_HPP -#include "composable_kernel/utility/vector_type.hpp" +#include "vector_type.hpp" #define NO_VM_WAIT 0 #define NO_LGKM_WAIT 0 diff --git a/composable_kernel/include/utility/common_header.hpp b/composable_kernel/include/utility/common_header.hpp new file mode 100644 index 0000000000..1c8dcbd521 --- /dev/null +++ b/composable_kernel/include/utility/common_header.hpp @@ -0,0 +1,18 @@ +#ifndef CK_COMMON_HPP +#define CK_COMMON_HPP + +#include "config.hpp" +#include "utility.hpp" +#include "vector_type.hpp" +#include "integral_constant.hpp" +#include "Sequence.hpp" +#include "Array.hpp" +#include "functional.hpp" +#include "functional2.hpp" +#include "functional3.hpp" + +#if CK_USE_AMD_INLINE_ASM +#include "amd_inline_asm.hpp" +#endif + +#endif diff --git a/include/composable_kernel/utility/config_amd.hpp.in b/composable_kernel/include/utility/config_amd.hpp.in similarity index 94% rename from include/composable_kernel/utility/config_amd.hpp.in rename to composable_kernel/include/utility/config_amd.hpp.in index d6ac44ab24..4d840df7fe 100644 --- a/include/composable_kernel/utility/config_amd.hpp.in +++ b/composable_kernel/include/utility/config_amd.hpp.in @@ -1,5 +1,5 @@ -#ifndef CK_CONFIG_HPP -#define CK_CONFIG_HPP +#ifndef CK_CONFIG_AMD_HPP +#define CK_CONFIG_AMD_HPP #cmakedefine01 CK_DEVICE_BACKEND_AMD diff --git a/include/composable_kernel/utility/config_nvidia.hpp.in b/composable_kernel/include/utility/config_nvidia.hpp.in similarity index 95% rename from include/composable_kernel/utility/config_nvidia.hpp.in rename to composable_kernel/include/utility/config_nvidia.hpp.in index a4d4bcaae7..0f9e3441ea 100644 --- a/include/composable_kernel/utility/config_nvidia.hpp.in +++ b/composable_kernel/include/utility/config_nvidia.hpp.in @@ -1,5 +1,5 @@ -#ifndef CK_CONFIG_CUDA_HPP -#define CK_CONFIG_CUDA_HPP +#ifndef CK_CONFIG_NVIDIA_HPP +#define CK_CONFIG_NVIDIA_HPP #cmakedefine01 CK_DEVICE_BACKEND_NVIDIA diff --git a/include/composable_kernel/utility/functional.hpp b/composable_kernel/include/utility/functional.hpp similarity index 93% rename from include/composable_kernel/utility/functional.hpp rename to composable_kernel/include/utility/functional.hpp index 92102ca418..920f11af2a 100644 --- a/include/composable_kernel/utility/functional.hpp +++ b/composable_kernel/include/utility/functional.hpp @@ -1,8 +1,8 @@ #ifndef CK_FUNCTIONAL_HPP #define CK_FUNCTIONAL_HPP -#include "composable_kernel/utility/integral_constant.hpp" -#include "composable_kernel/utility/Sequence.hpp" +#include "integral_constant.hpp" +#include "Sequence.hpp" namespace ck { diff --git a/include/composable_kernel/utility/functional2.hpp b/composable_kernel/include/utility/functional2.hpp similarity index 93% rename from include/composable_kernel/utility/functional2.hpp rename to composable_kernel/include/utility/functional2.hpp index 6d5e38d2ea..3820056593 100644 --- a/include/composable_kernel/utility/functional2.hpp +++ b/composable_kernel/include/utility/functional2.hpp @@ -1,8 +1,8 @@ #ifndef CK_FUNCTIONAL2_HPP #define CK_FUNCTIONAL2_HPP -#include "composable_kernel/utility/functional.hpp" -#include "composable_kernel/utility/Sequence.hpp" +#include "functional.hpp" +#include "Sequence.hpp" namespace ck { diff --git a/include/composable_kernel/utility/functional3.hpp b/composable_kernel/include/utility/functional3.hpp similarity index 93% rename from include/composable_kernel/utility/functional3.hpp rename to composable_kernel/include/utility/functional3.hpp index 769f1bea0b..fc5f8a6bab 100644 --- a/include/composable_kernel/utility/functional3.hpp +++ b/composable_kernel/include/utility/functional3.hpp @@ -1,10 +1,10 @@ #ifndef CK_FUNCTIONAL3_HPP #define CK_FUNCTIONAL3_HPP -#include "composable_kernel/utility/functional.hpp" -#include "composable_kernel/utility/functional2.hpp" -#include "composable_kernel/utility/Sequence.hpp" -#include "composable_kernel/utility/Array.hpp" +#include "functional.hpp" +#include "functional2.hpp" +#include "Sequence.hpp" +#include "Array.hpp" namespace ck { diff --git a/include/composable_kernel/utility/integral_constant.hpp b/composable_kernel/include/utility/integral_constant.hpp similarity index 100% rename from include/composable_kernel/utility/integral_constant.hpp rename to composable_kernel/include/utility/integral_constant.hpp diff --git a/include/composable_kernel/utility/utility.hpp b/composable_kernel/include/utility/utility.hpp similarity index 99% rename from include/composable_kernel/utility/utility.hpp rename to composable_kernel/include/utility/utility.hpp index ed834b0df0..9d32c2be55 100644 --- a/include/composable_kernel/utility/utility.hpp +++ b/composable_kernel/include/utility/utility.hpp @@ -1,6 +1,8 @@ #ifndef CK_UTILITY_HPP #define CK_UTILITY_HPP +#include "config.hpp" + namespace ck { __device__ index_t get_thread_local_1d_id() { return threadIdx.x; } diff --git a/include/composable_kernel/utility/vector_type.hpp b/composable_kernel/include/utility/vector_type.hpp similarity index 96% rename from include/composable_kernel/utility/vector_type.hpp rename to composable_kernel/include/utility/vector_type.hpp index 7c48bdb1f8..72c73068e1 100644 --- a/include/composable_kernel/utility/vector_type.hpp +++ b/composable_kernel/include/utility/vector_type.hpp @@ -1,8 +1,8 @@ #ifndef CK_VECTOR_TYPE_HPP #define CK_VECTOR_TYPE_HPP -#include "composable_kernel/utility/config.hpp" -#include "composable_kernel/utility/integral_constant.hpp" +#include "config.hpp" +#include "integral_constant.hpp" namespace ck { diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt index f9528ae211..e5a1b1f732 100644 --- a/driver/CMakeLists.txt +++ b/driver/CMakeLists.txt @@ -1,7 +1,23 @@ +set(TENSOR_SOURCE + src/tensor.cpp; + src/device.cpp; +) + +add_library(tensor SHARED ${TENSOR_SOURCE}) +target_compile_features(tensor PUBLIC) +set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON) + +if(DEVICE_BACKEND STREQUAL "NVIDIA") + target_link_libraries(tensor nvToolsExt cudart) +endif() + +install(TARGETS tensor LIBRARY DESTINATION lib) + + if(DEVICE_BACKEND STREQUAL "AMD") - set(DRIVER_SOURCE driver.cpp) + set(DRIVER_SOURCE src/driver.cpp) elseif(DEVICE_BACKEND STREQUAL "NVIDIA") - set(DRIVER_SOURCE driver.cu) + set(DRIVER_SOURCE src/driver.cu) endif() add_executable(driver ${DRIVER_SOURCE}) diff --git a/include/conv_common.hpp b/driver/include/conv_common.hpp similarity index 98% rename from include/conv_common.hpp rename to driver/include/conv_common.hpp index 9f41a1d440..254f4c5651 100644 --- a/include/conv_common.hpp +++ b/driver/include/conv_common.hpp @@ -1,7 +1,7 @@ #ifndef CK_CONV_COMMON_HPP #define CK_CONV_COMMON_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "ConstantTensorDescriptor.hpp" using namespace ck; diff --git a/include/device.hpp b/driver/include/device.hpp similarity index 96% rename from include/device.hpp rename to driver/include/device.hpp index 59ab0d0bcd..faa4019a09 100644 --- a/include/device.hpp +++ b/driver/include/device.hpp @@ -2,7 +2,7 @@ #define CK_DEVICE_HPP #include -#include "composable_kernel/utility/config.hpp" +#include "config.hpp" using namespace ck; diff --git a/driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp b/driver/include/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp similarity index 97% rename from driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp rename to driver/include/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp index de1353a4ed..e19051a9bd 100644 --- a/driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp +++ b/driver/include/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp @@ -1,8 +1,9 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp" +#include "gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp b/driver/include/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp similarity index 97% rename from driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp rename to driver/include/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp index 67d27ec273..de82858288 100644 --- a/driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp +++ b/driver/include/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp @@ -1,11 +1,12 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp b/driver/include/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp similarity index 98% rename from driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp rename to driver/include/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp index 5c2dff2fb4..03cbc204c7 100644 --- a/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp +++ b/driver/include/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp @@ -1,9 +1,10 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp b/driver/include/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp similarity index 98% rename from driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp rename to driver/include/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp index 7575d061c1..a26347d032 100644 --- a/driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp +++ b/driver/include/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp @@ -1,9 +1,10 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp b/driver/include/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp similarity index 96% rename from driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp rename to driver/include/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp index 2ac490e7fa..e4fa7ef0cd 100644 --- a/driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp +++ b/driver/include/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp @@ -1,9 +1,10 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp" +#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp b/driver/include/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp similarity index 96% rename from driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp rename to driver/include/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp index 92213a3029..80a6155271 100644 --- a/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp +++ b/driver/include/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp @@ -1,9 +1,10 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp" +#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp b/driver/include/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp similarity index 98% rename from driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp rename to driver/include/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp index 0911607032..c6be195213 100644 --- a/driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp +++ b/driver/include/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp @@ -1,7 +1,8 @@ #pragma once #include #include "device.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp" +#include "tensor.hpp" +#include "gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp" using namespace ck; diff --git a/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp b/driver/include/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp similarity index 99% rename from driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp rename to driver/include/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp index 98d55e1538..5611e7c612 100644 --- a/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp +++ b/driver/include/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp @@ -1,7 +1,8 @@ #pragma once #include #include "device.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp" +#include "tensor.hpp" +#include "gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp" using namespace ck; diff --git a/include/tensor.hpp b/driver/include/tensor.hpp similarity index 100% rename from include/tensor.hpp rename to driver/include/tensor.hpp diff --git a/driver/src/CMakeLists.txt b/driver/src/CMakeLists.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/device.cpp b/driver/src/device.cpp similarity index 98% rename from src/device.cpp rename to driver/src/device.cpp index ca5c22e0ab..76cb19f466 100644 --- a/src/device.cpp +++ b/driver/src/device.cpp @@ -1,4 +1,4 @@ -#include "composable_kernel/utility/config.hpp" +#include "config.hpp" #include "device.hpp" DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size) diff --git a/driver/driver.cpp b/driver/src/driver.cpp similarity index 99% rename from driver/driver.cpp rename to driver/src/driver.cpp index 4c6d9474ff..b930734c00 100644 --- a/driver/driver.cpp +++ b/driver/src/driver.cpp @@ -3,9 +3,9 @@ #include #include #include -#include "composable_kernel/utility/config.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "tensor.hpp" +#include "config.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "device.hpp" #include "conv_common.hpp" #include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp" #include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp" diff --git a/driver/driver.cu b/driver/src/driver.cu similarity index 100% rename from driver/driver.cu rename to driver/src/driver.cu diff --git a/src/tensor.cpp b/driver/src/tensor.cpp similarity index 100% rename from src/tensor.cpp rename to driver/src/tensor.cpp diff --git a/include/composable_kernel/utility/common.hpp b/include/composable_kernel/utility/common.hpp deleted file mode 100644 index 08167cbccc..0000000000 --- a/include/composable_kernel/utility/common.hpp +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef CK_COMMON_HPP -#define CK_COMMON_HPP - -#include "composable_kernel/utility/utility.hpp" -#include "composable_kernel/utility/vector_type.hpp" -#include "composable_kernel/utility/integral_constant.hpp" -#include "composable_kernel/utility/Sequence.hpp" -#include "composable_kernel/utility/Array.hpp" -#include "composable_kernel/utility/functional.hpp" -#include "composable_kernel/utility/functional2.hpp" -#include "composable_kernel/utility/functional3.hpp" - -#if CK_USE_AMD_INLINE_ASM -#include "composable_kernel/utility/amd_inline_asm.hpp" -#endif - -#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index ef8a781dd5..0000000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -if(DEVICE_BACKEND STREQUAL "AMD") - configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config_amd.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp") -elseif(DEVICE_BACKEND STREQUAL "NVIDIA") - configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config_nvidia.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp") -endif() - -set(TENSOR_SOURCE - tensor.cpp; - device.cpp; -) - -add_library(tensor SHARED ${TENSOR_SOURCE}) -target_compile_features(tensor PUBLIC) -set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON) - -if(DEVICE_BACKEND STREQUAL "NVIDIA") - target_link_libraries(tensor nvToolsExt cudart) -endif() - -install(TARGETS tensor LIBRARY DESTINATION lib)