diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cbca2bb77..55727cc7c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,8 +46,19 @@ endif() # include_directories(BEFORE - include - ${PROJECT_BINARY_DIR}/include + ${PROJECT_SOURCE_DIR}/composable_kernel/include + ${PROJECT_SOURCE_DIR}/composable_kernel/include/utility + ${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description + ${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation + ${PROJECT_SOURCE_DIR}/composable_kernel/include/kernel_algorithm + ${PROJECT_SOURCE_DIR}/driver/include + ${PROJECT_BINARY_DIR}/composable_kernel/include/utility ) -add_subdirectory(src) + +if(DEVICE_BACKEND STREQUAL "AMD") + configure_file("${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/config_amd.hpp.in" "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/config.hpp") +elseif(DEVICE_BACKEND STREQUAL "NVIDIA") + configure_file("${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/config_nvidia.hpp.in" "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/config.hpp") +endif() + add_subdirectory(driver) diff --git a/include/gridwise_convolution_kernel_wrapper.hpp b/composable_kernel/include/gridwise_convolution_kernel_wrapper.hpp similarity index 100% rename from include/gridwise_convolution_kernel_wrapper.hpp rename to composable_kernel/include/gridwise_convolution_kernel_wrapper.hpp diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp index f788bdd093..4958353849 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp @@ -1,12 +1,12 @@ #ifndef CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW #define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_direct_convolution.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp index 711f3464b6..eca22ce632 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp index 6a3471c7f6..23c1be5272 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp @@ -1,15 +1,15 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_3d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp index 5f6ba76d4b..c54eb80dd9 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp index 3f96d2de9d..f421cfa4c3 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp index a685727b50..447ce4ce52 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_tensor_slice_copy.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_generic_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp index d531f5c0b2..3c71a5afc4 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp @@ -1,14 +1,14 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_tensor_slice_copy.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "threadwise_generic_tensor_op.hpp" +#include "blockwise_batched_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp index dd7e02e715..a1525e74ee 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp @@ -1,12 +1,12 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp index 9839f7de24..31832b2ef9 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp @@ -1,13 +1,13 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "threadwise_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp index 67440af681..d1b77f06e7 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp @@ -1,12 +1,12 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_generic_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp index d3cdb640ff..0d5b520c53 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp @@ -1,12 +1,12 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_generic_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp index c97133dfad..f6535b3d7b 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp @@ -1,13 +1,13 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_generic_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" +#include "threadwise_generic_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp similarity index 97% rename from include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp index d6badfda8b..38e9360016 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp @@ -1,13 +1,17 @@ #ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER #define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_generic_tensor_slice_copy.hpp" +#include "blockwise_gemm.hpp" +#include "threadwise_generic_tensor_slice_copy.hpp" + +#ifndef CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM +#define CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM 1 +#endif namespace ck { @@ -233,10 +237,10 @@ struct GridwiseConvolutionImplicitGemm_v4_nchw_kcyx_nkhw_lds_double_buffer // choose GEMM implementation here const auto run_blockwise_gemm = [&](auto... Xs) { -#if 1 - return blockwise_gemm.Run(Xs...); -#else +#if CK_USE_AMD_INLINE_ASM && CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM return blockwise_gemm.Run_asm(Xs...); +#else + return blockwise_gemm.Run(Xs...); #endif }; diff --git a/include/composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp b/composable_kernel/include/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp similarity index 95% rename from include/composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp index e160ef8686..00cf53c361 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp @@ -1,11 +1,11 @@ #pragma once -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_direct_convolution.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_direct_convolution.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "threadwise_direct_convolution.hpp" namespace ck { diff --git a/include/composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp b/composable_kernel/include/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp similarity index 96% rename from include/composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp rename to composable_kernel/include/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp index c4dfb03dac..58ecc244a6 100644 --- a/include/composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp @@ -1,11 +1,11 @@ #pragma once -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" -#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp" -#include "composable_kernel/tensor_operation/blockwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "blockwise_4d_tensor_op.hpp" +#include "blockwise_2d_tensor_op.hpp" +#include "threadwise_4d_tensor_op.hpp" +#include "blockwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp b/composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp similarity index 97% rename from include/composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp rename to composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp index 36b6efc467..e24f16aa16 100644 --- a/include/composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp +++ b/composable_kernel/include/tensor_description/ConstantMatrixDescriptor.hpp @@ -1,7 +1,7 @@ #ifndef CK_CONSTANT_MATRIX_DESCRIPTOR_HPP #define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP -#include "composable_kernel/utility/common.hpp" +#include "common_header.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp b/composable_kernel/include/tensor_description/ConstantMergedTensorDescriptor.hpp similarity index 98% rename from include/composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp rename to composable_kernel/include/tensor_description/ConstantMergedTensorDescriptor.hpp index 600577b07b..757f0ad691 100644 --- a/include/composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp +++ b/composable_kernel/include/tensor_description/ConstantMergedTensorDescriptor.hpp @@ -1,8 +1,8 @@ #ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP #define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_description/ConstantTensorDescriptor.hpp b/composable_kernel/include/tensor_description/ConstantTensorDescriptor.hpp similarity index 99% rename from include/composable_kernel/tensor_description/ConstantTensorDescriptor.hpp rename to composable_kernel/include/tensor_description/ConstantTensorDescriptor.hpp index c9415cd9d4..76c3761d10 100644 --- a/include/composable_kernel/tensor_description/ConstantTensorDescriptor.hpp +++ b/composable_kernel/include/tensor_description/ConstantTensorDescriptor.hpp @@ -1,7 +1,7 @@ #ifndef CK_CONSTANT_TENSOR_DESCRIPTOR_HPP #define CK_CONSTANT_TENSOR_DESCRIPTOR_HPP -#include "composable_kernel/utility/common.hpp" +#include "common_header.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp b/composable_kernel/include/tensor_operation/blockwise_2d_tensor_op.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp rename to composable_kernel/include/tensor_operation/blockwise_2d_tensor_op.hpp index 09ef7476cf..9354da7861 100644 --- a/include/composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_2d_tensor_op.hpp @@ -1,8 +1,8 @@ #ifndef CK_BLOCKWISE_2D_TENSOR_OP_HPP #define CK_BLOCKWISE_2D_TENSOR_OP_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp b/composable_kernel/include/tensor_operation/blockwise_3d_tensor_op.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp rename to composable_kernel/include/tensor_operation/blockwise_3d_tensor_op.hpp index e17972a1a9..fb2eafa160 100644 --- a/include/composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_3d_tensor_op.hpp @@ -1,8 +1,8 @@ #ifndef CK_BLOCKWISE_3D_TENSOR_OP_HPP #define CK_BLOCKWISE_3D_TENSOR_OP_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp b/composable_kernel/include/tensor_operation/blockwise_4d_tensor_op.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp rename to composable_kernel/include/tensor_operation/blockwise_4d_tensor_op.hpp index bad94e6f35..6f65ce077d 100644 --- a/include/composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_4d_tensor_op.hpp @@ -1,8 +1,9 @@ #ifndef CK_BLOCKWISE_4D_TENSOR_OP_HPP #define CK_BLOCKWISE_4D_TENSOR_OP_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "threadwise_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_batched_gemm.hpp b/composable_kernel/include/tensor_operation/blockwise_batched_gemm.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_batched_gemm.hpp rename to composable_kernel/include/tensor_operation/blockwise_batched_gemm.hpp index db51bb0b1d..0b1c9e2a53 100644 --- a/include/composable_kernel/tensor_operation/blockwise_batched_gemm.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_batched_gemm.hpp @@ -1,7 +1,9 @@ #ifndef CK_BLOCKWISE_BATCHED_GEMM_HPP #define CK_BLOCKWISE_BATCHED_GEMM_HPP -#include "composable_kernel/tensor_operation/threadwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "threadwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_gemm.hpp b/composable_kernel/include/tensor_operation/blockwise_gemm.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_gemm.hpp rename to composable_kernel/include/tensor_operation/blockwise_gemm.hpp index c045c32c03..f257137f3c 100644 --- a/include/composable_kernel/tensor_operation/blockwise_gemm.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_gemm.hpp @@ -1,8 +1,9 @@ #ifndef CK_BLOCKWISE_GEMM_HPP #define CK_BLOCKWISE_GEMM_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_operation/threadwise_gemm.hpp" +#include "common_header.hpp" +#include "ConstantMatrixDescriptor.hpp" +#include "threadwise_gemm.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp b/composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp rename to composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp index 1381b7ef14..1496630543 100644 --- a/include/composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp @@ -1,7 +1,10 @@ #ifndef CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP #define CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP -#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" +#include "threadwise_generic_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp b/composable_kernel/include/tensor_operation/blockwise_tensor_slice_copy.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp rename to composable_kernel/include/tensor_operation/blockwise_tensor_slice_copy.hpp index d4576279a8..2de6cf1d45 100644 --- a/include/composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp +++ b/composable_kernel/include/tensor_operation/blockwise_tensor_slice_copy.hpp @@ -1,7 +1,9 @@ #ifndef CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP #define CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "threadwise_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp b/composable_kernel/include/tensor_operation/threadwise_4d_tensor_op.hpp similarity index 95% rename from include/composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp rename to composable_kernel/include/tensor_operation/threadwise_4d_tensor_op.hpp index 1af6e4dc34..a8d0398de1 100644 --- a/include/composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_4d_tensor_op.hpp @@ -1,7 +1,8 @@ #ifndef CK_THREADWISE_4D_TENSOR_OP_HPP #define CK_THREADWISE_4D_TENSOR_OP_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_direct_convolution.hpp b/composable_kernel/include/tensor_operation/threadwise_direct_convolution.hpp similarity index 98% rename from include/composable_kernel/tensor_operation/threadwise_direct_convolution.hpp rename to composable_kernel/include/tensor_operation/threadwise_direct_convolution.hpp index 0332dd9d3d..3e84cbd8b7 100644 --- a/include/composable_kernel/tensor_operation/threadwise_direct_convolution.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_direct_convolution.hpp @@ -1,8 +1,9 @@ #ifndef CK_THREADWISE_DIRECT_CONVOLUTION_HPP #define CK_THREADWISE_DIRECT_CONVOLUTION_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "threadwise_tensor_slice_copy.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_gemm.hpp b/composable_kernel/include/tensor_operation/threadwise_gemm.hpp similarity index 96% rename from include/composable_kernel/tensor_operation/threadwise_gemm.hpp rename to composable_kernel/include/tensor_operation/threadwise_gemm.hpp index 5610b65e09..ea77027c10 100644 --- a/include/composable_kernel/tensor_operation/threadwise_gemm.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_gemm.hpp @@ -1,8 +1,8 @@ #ifndef CK_THREADWISE_GEMM_HPP #define CK_THREADWISE_GEMM_HPP -#include "composable_kernel/utility/common.hpp" -#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantMatrixDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp b/composable_kernel/include/tensor_operation/threadwise_generic_tensor_op.hpp similarity index 73% rename from include/composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp rename to composable_kernel/include/tensor_operation/threadwise_generic_tensor_op.hpp index beca415604..c0b4e89391 100644 --- a/include/composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_generic_tensor_op.hpp @@ -1,8 +1,9 @@ #ifndef CK_THREADWISE_GENERIC_TENSOR_OP_HPP #define CK_THREADWISE_GENERIC_TENSOR_OP_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" namespace ck { template diff --git a/include/composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp b/composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp similarity index 96% rename from include/composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp rename to composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp index 588766b477..2dd7b79ab2 100644 --- a/include/composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp @@ -1,8 +1,9 @@ #ifndef CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP #define CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "ConstantMergedTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp b/composable_kernel/include/tensor_operation/threadwise_tensor_slice_copy.hpp similarity index 99% rename from include/composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp rename to composable_kernel/include/tensor_operation/threadwise_tensor_slice_copy.hpp index d48eed6ad1..8d0ce26f94 100644 --- a/include/composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp +++ b/composable_kernel/include/tensor_operation/threadwise_tensor_slice_copy.hpp @@ -1,7 +1,8 @@ #ifndef CK_THREADWISE_TENSOR_SLICE_COPY_HPP #define CK_THREADWISE_TENSOR_SLICE_COPY_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "common_header.hpp" +#include "ConstantTensorDescriptor.hpp" namespace ck { diff --git a/include/composable_kernel/utility/Array.hpp b/composable_kernel/include/utility/Array.hpp similarity index 99% rename from include/composable_kernel/utility/Array.hpp rename to composable_kernel/include/utility/Array.hpp index f67f5a6cde..fcf87c5843 100644 --- a/include/composable_kernel/utility/Array.hpp +++ b/composable_kernel/include/utility/Array.hpp @@ -1,8 +1,8 @@ #ifndef CK_ARRAY_HPP #define CK_ARRAY_HPP -#include "composable_kernel/utility/Sequence.hpp" -#include "composable_kernel/utility/functional2.hpp" +#include "Sequence.hpp" +#include "functional2.hpp" namespace ck { diff --git a/include/composable_kernel/utility/Sequence.hpp b/composable_kernel/include/utility/Sequence.hpp similarity index 99% rename from include/composable_kernel/utility/Sequence.hpp rename to composable_kernel/include/utility/Sequence.hpp index 70c3a5dc39..09d67c13fa 100644 --- a/include/composable_kernel/utility/Sequence.hpp +++ b/composable_kernel/include/utility/Sequence.hpp @@ -1,8 +1,8 @@ #ifndef CK_SEQUENCE_HPP #define CK_SEQUENCE_HPP -#include "composable_kernel/utility/integral_constant.hpp" -#include "composable_kernel/utility/functional.hpp" +#include "integral_constant.hpp" +#include "functional.hpp" namespace ck { diff --git a/include/composable_kernel/utility/amd_inline_asm.hpp b/composable_kernel/include/utility/amd_inline_asm.hpp similarity index 99% rename from include/composable_kernel/utility/amd_inline_asm.hpp rename to composable_kernel/include/utility/amd_inline_asm.hpp index 344bc87063..e82cd62db8 100644 --- a/include/composable_kernel/utility/amd_inline_asm.hpp +++ b/composable_kernel/include/utility/amd_inline_asm.hpp @@ -1,7 +1,7 @@ #ifndef CK_AMD_INLINE_ASM_HPP #define CK_AMD_INLINE_ASM_HPP -#include "composable_kernel/utility/vector_type.hpp" +#include "vector_type.hpp" #define NO_VM_WAIT 0 #define NO_LGKM_WAIT 0 diff --git a/composable_kernel/include/utility/common_header.hpp b/composable_kernel/include/utility/common_header.hpp new file mode 100644 index 0000000000..1c8dcbd521 --- /dev/null +++ b/composable_kernel/include/utility/common_header.hpp @@ -0,0 +1,18 @@ +#ifndef CK_COMMON_HPP +#define CK_COMMON_HPP + +#include "config.hpp" +#include "utility.hpp" +#include "vector_type.hpp" +#include "integral_constant.hpp" +#include "Sequence.hpp" +#include "Array.hpp" +#include "functional.hpp" +#include "functional2.hpp" +#include "functional3.hpp" + +#if CK_USE_AMD_INLINE_ASM +#include "amd_inline_asm.hpp" +#endif + +#endif diff --git a/include/composable_kernel/utility/config_amd.hpp.in b/composable_kernel/include/utility/config_amd.hpp.in similarity index 94% rename from include/composable_kernel/utility/config_amd.hpp.in rename to composable_kernel/include/utility/config_amd.hpp.in index d6ac44ab24..4d840df7fe 100644 --- a/include/composable_kernel/utility/config_amd.hpp.in +++ b/composable_kernel/include/utility/config_amd.hpp.in @@ -1,5 +1,5 @@ -#ifndef CK_CONFIG_HPP -#define CK_CONFIG_HPP +#ifndef CK_CONFIG_AMD_HPP +#define CK_CONFIG_AMD_HPP #cmakedefine01 CK_DEVICE_BACKEND_AMD diff --git a/include/composable_kernel/utility/config_nvidia.hpp.in b/composable_kernel/include/utility/config_nvidia.hpp.in similarity index 95% rename from include/composable_kernel/utility/config_nvidia.hpp.in rename to composable_kernel/include/utility/config_nvidia.hpp.in index a4d4bcaae7..0f9e3441ea 100644 --- a/include/composable_kernel/utility/config_nvidia.hpp.in +++ b/composable_kernel/include/utility/config_nvidia.hpp.in @@ -1,5 +1,5 @@ -#ifndef CK_CONFIG_CUDA_HPP -#define CK_CONFIG_CUDA_HPP +#ifndef CK_CONFIG_NVIDIA_HPP +#define CK_CONFIG_NVIDIA_HPP #cmakedefine01 CK_DEVICE_BACKEND_NVIDIA diff --git a/include/composable_kernel/utility/functional.hpp b/composable_kernel/include/utility/functional.hpp similarity index 93% rename from include/composable_kernel/utility/functional.hpp rename to composable_kernel/include/utility/functional.hpp index 92102ca418..920f11af2a 100644 --- a/include/composable_kernel/utility/functional.hpp +++ b/composable_kernel/include/utility/functional.hpp @@ -1,8 +1,8 @@ #ifndef CK_FUNCTIONAL_HPP #define CK_FUNCTIONAL_HPP -#include "composable_kernel/utility/integral_constant.hpp" -#include "composable_kernel/utility/Sequence.hpp" +#include "integral_constant.hpp" +#include "Sequence.hpp" namespace ck { diff --git a/include/composable_kernel/utility/functional2.hpp b/composable_kernel/include/utility/functional2.hpp similarity index 93% rename from include/composable_kernel/utility/functional2.hpp rename to composable_kernel/include/utility/functional2.hpp index 6d5e38d2ea..3820056593 100644 --- a/include/composable_kernel/utility/functional2.hpp +++ b/composable_kernel/include/utility/functional2.hpp @@ -1,8 +1,8 @@ #ifndef CK_FUNCTIONAL2_HPP #define CK_FUNCTIONAL2_HPP -#include "composable_kernel/utility/functional.hpp" -#include "composable_kernel/utility/Sequence.hpp" +#include "functional.hpp" +#include "Sequence.hpp" namespace ck { diff --git a/include/composable_kernel/utility/functional3.hpp b/composable_kernel/include/utility/functional3.hpp similarity index 93% rename from include/composable_kernel/utility/functional3.hpp rename to composable_kernel/include/utility/functional3.hpp index 769f1bea0b..fc5f8a6bab 100644 --- a/include/composable_kernel/utility/functional3.hpp +++ b/composable_kernel/include/utility/functional3.hpp @@ -1,10 +1,10 @@ #ifndef CK_FUNCTIONAL3_HPP #define CK_FUNCTIONAL3_HPP -#include "composable_kernel/utility/functional.hpp" -#include "composable_kernel/utility/functional2.hpp" -#include "composable_kernel/utility/Sequence.hpp" -#include "composable_kernel/utility/Array.hpp" +#include "functional.hpp" +#include "functional2.hpp" +#include "Sequence.hpp" +#include "Array.hpp" namespace ck { diff --git a/include/composable_kernel/utility/integral_constant.hpp b/composable_kernel/include/utility/integral_constant.hpp similarity index 100% rename from include/composable_kernel/utility/integral_constant.hpp rename to composable_kernel/include/utility/integral_constant.hpp diff --git a/include/composable_kernel/utility/utility.hpp b/composable_kernel/include/utility/utility.hpp similarity index 99% rename from include/composable_kernel/utility/utility.hpp rename to composable_kernel/include/utility/utility.hpp index ed834b0df0..9d32c2be55 100644 --- a/include/composable_kernel/utility/utility.hpp +++ b/composable_kernel/include/utility/utility.hpp @@ -1,6 +1,8 @@ #ifndef CK_UTILITY_HPP #define CK_UTILITY_HPP +#include "config.hpp" + namespace ck { __device__ index_t get_thread_local_1d_id() { return threadIdx.x; } diff --git a/include/composable_kernel/utility/vector_type.hpp b/composable_kernel/include/utility/vector_type.hpp similarity index 96% rename from include/composable_kernel/utility/vector_type.hpp rename to composable_kernel/include/utility/vector_type.hpp index 7c48bdb1f8..72c73068e1 100644 --- a/include/composable_kernel/utility/vector_type.hpp +++ b/composable_kernel/include/utility/vector_type.hpp @@ -1,8 +1,8 @@ #ifndef CK_VECTOR_TYPE_HPP #define CK_VECTOR_TYPE_HPP -#include "composable_kernel/utility/config.hpp" -#include "composable_kernel/utility/integral_constant.hpp" +#include "config.hpp" +#include "integral_constant.hpp" namespace ck { diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt index f9528ae211..e5a1b1f732 100644 --- a/driver/CMakeLists.txt +++ b/driver/CMakeLists.txt @@ -1,7 +1,23 @@ +set(TENSOR_SOURCE + src/tensor.cpp; + src/device.cpp; +) + +add_library(tensor SHARED ${TENSOR_SOURCE}) +target_compile_features(tensor PUBLIC) +set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON) + +if(DEVICE_BACKEND STREQUAL "NVIDIA") + target_link_libraries(tensor nvToolsExt cudart) +endif() + +install(TARGETS tensor LIBRARY DESTINATION lib) + + if(DEVICE_BACKEND STREQUAL "AMD") - set(DRIVER_SOURCE driver.cpp) + set(DRIVER_SOURCE src/driver.cpp) elseif(DEVICE_BACKEND STREQUAL "NVIDIA") - set(DRIVER_SOURCE driver.cu) + set(DRIVER_SOURCE src/driver.cu) endif() add_executable(driver ${DRIVER_SOURCE}) diff --git a/include/conv_common.hpp b/driver/include/conv_common.hpp similarity index 98% rename from include/conv_common.hpp rename to driver/include/conv_common.hpp index 9f41a1d440..254f4c5651 100644 --- a/include/conv_common.hpp +++ b/driver/include/conv_common.hpp @@ -1,7 +1,7 @@ #ifndef CK_CONV_COMMON_HPP #define CK_CONV_COMMON_HPP -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" +#include "ConstantTensorDescriptor.hpp" using namespace ck; diff --git a/include/device.hpp b/driver/include/device.hpp similarity index 96% rename from include/device.hpp rename to driver/include/device.hpp index 59ab0d0bcd..faa4019a09 100644 --- a/include/device.hpp +++ b/driver/include/device.hpp @@ -2,7 +2,7 @@ #define CK_DEVICE_HPP #include -#include "composable_kernel/utility/config.hpp" +#include "config.hpp" using namespace ck; diff --git a/driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp b/driver/include/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp similarity index 97% rename from driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp rename to driver/include/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp index de1353a4ed..e19051a9bd 100644 --- a/driver/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp +++ b/driver/include/device_convolution_direct_v2_nchw_kcyx_nkhw.hpp @@ -1,8 +1,9 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp" +#include "gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp b/driver/include/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp similarity index 97% rename from driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp rename to driver/include/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp index 67d27ec273..de82858288 100644 --- a/driver/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp +++ b/driver/include/device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp @@ -1,11 +1,12 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp b/driver/include/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp similarity index 98% rename from driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp rename to driver/include/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp index 5c2dff2fb4..03cbc204c7 100644 --- a/driver/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp +++ b/driver/include/device_convolution_implicit_gemm_v1_nchw_cyxk_nkhw.hpp @@ -1,9 +1,10 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp" +#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp b/driver/include/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp similarity index 98% rename from driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp rename to driver/include/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp index 7575d061c1..a26347d032 100644 --- a/driver/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp +++ b/driver/include/device_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp @@ -1,9 +1,10 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp" +#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp b/driver/include/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp similarity index 96% rename from driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp rename to driver/include/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp index 2ac490e7fa..e4fa7ef0cd 100644 --- a/driver/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp +++ b/driver/include/device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp @@ -1,9 +1,10 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp" +#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp b/driver/include/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp similarity index 96% rename from driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp rename to driver/include/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp index 92213a3029..80a6155271 100644 --- a/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp +++ b/driver/include/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp @@ -1,9 +1,10 @@ #pragma once #include #include "device.hpp" +#include "tensor.hpp" #include "gridwise_convolution_kernel_wrapper.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp" +#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp" +#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp" using namespace ck; diff --git a/driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp b/driver/include/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp similarity index 98% rename from driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp rename to driver/include/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp index 0911607032..c6be195213 100644 --- a/driver/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp +++ b/driver/include/device_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp @@ -1,7 +1,8 @@ #pragma once #include #include "device.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp" +#include "tensor.hpp" +#include "gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp" using namespace ck; diff --git a/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp b/driver/include/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp similarity index 99% rename from driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp rename to driver/include/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp index 98d55e1538..5611e7c612 100644 --- a/driver/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp +++ b/driver/include/device_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp @@ -1,7 +1,8 @@ #pragma once #include #include "device.hpp" -#include "composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp" +#include "tensor.hpp" +#include "gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp" using namespace ck; diff --git a/include/tensor.hpp b/driver/include/tensor.hpp similarity index 100% rename from include/tensor.hpp rename to driver/include/tensor.hpp diff --git a/driver/src/CMakeLists.txt b/driver/src/CMakeLists.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/device.cpp b/driver/src/device.cpp similarity index 98% rename from src/device.cpp rename to driver/src/device.cpp index ca5c22e0ab..76cb19f466 100644 --- a/src/device.cpp +++ b/driver/src/device.cpp @@ -1,4 +1,4 @@ -#include "composable_kernel/utility/config.hpp" +#include "config.hpp" #include "device.hpp" DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size) diff --git a/driver/driver.cpp b/driver/src/driver.cpp similarity index 99% rename from driver/driver.cpp rename to driver/src/driver.cpp index 4c6d9474ff..b930734c00 100644 --- a/driver/driver.cpp +++ b/driver/src/driver.cpp @@ -3,9 +3,9 @@ #include #include #include -#include "composable_kernel/utility/config.hpp" -#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp" -#include "tensor.hpp" +#include "config.hpp" +#include "ConstantTensorDescriptor.hpp" +#include "device.hpp" #include "conv_common.hpp" #include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp" #include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp" diff --git a/driver/driver.cu b/driver/src/driver.cu similarity index 100% rename from driver/driver.cu rename to driver/src/driver.cu diff --git a/src/tensor.cpp b/driver/src/tensor.cpp similarity index 100% rename from src/tensor.cpp rename to driver/src/tensor.cpp diff --git a/include/composable_kernel/utility/common.hpp b/include/composable_kernel/utility/common.hpp deleted file mode 100644 index 08167cbccc..0000000000 --- a/include/composable_kernel/utility/common.hpp +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef CK_COMMON_HPP -#define CK_COMMON_HPP - -#include "composable_kernel/utility/utility.hpp" -#include "composable_kernel/utility/vector_type.hpp" -#include "composable_kernel/utility/integral_constant.hpp" -#include "composable_kernel/utility/Sequence.hpp" -#include "composable_kernel/utility/Array.hpp" -#include "composable_kernel/utility/functional.hpp" -#include "composable_kernel/utility/functional2.hpp" -#include "composable_kernel/utility/functional3.hpp" - -#if CK_USE_AMD_INLINE_ASM -#include "composable_kernel/utility/amd_inline_asm.hpp" -#endif - -#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index ef8a781dd5..0000000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -if(DEVICE_BACKEND STREQUAL "AMD") - configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config_amd.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp") -elseif(DEVICE_BACKEND STREQUAL "NVIDIA") - configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config_nvidia.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp") -endif() - -set(TENSOR_SOURCE - tensor.cpp; - device.cpp; -) - -add_library(tensor SHARED ${TENSOR_SOURCE}) -target_compile_features(tensor PUBLIC) -set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON) - -if(DEVICE_BACKEND STREQUAL "NVIDIA") - target_link_libraries(tensor nvToolsExt cudart) -endif() - -install(TARGETS tensor LIBRARY DESTINATION lib)