reorginzed files

[ROCm/composable_kernel commit: 1566b31736]
This commit is contained in:
Chao Liu
2019-06-13 15:12:12 -05:00
parent 11c6b2ab9a
commit 5f217ebda5
64 changed files with 254 additions and 218 deletions

View File

@@ -46,8 +46,19 @@ endif()
#
include_directories(BEFORE
include
${PROJECT_BINARY_DIR}/include
${PROJECT_SOURCE_DIR}/composable_kernel/include
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation
${PROJECT_SOURCE_DIR}/composable_kernel/include/kernel_algorithm
${PROJECT_SOURCE_DIR}/driver/include
${PROJECT_BINARY_DIR}/composable_kernel/include/utility
)
add_subdirectory(src)
if(DEVICE_BACKEND STREQUAL "AMD")
configure_file("${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/config_amd.hpp.in" "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/config.hpp")
elseif(DEVICE_BACKEND STREQUAL "NVIDIA")
configure_file("${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/config_nvidia.hpp.in" "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/config.hpp")
endif()
add_subdirectory(driver)

View File

@@ -1,12 +1,12 @@
#ifndef CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_direct_convolution.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,15 +1,15 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_3d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,12 +1,12 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,13 +1,13 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,12 +1,12 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,12 +1,12 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,13 +1,13 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,13 +1,17 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#ifndef CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM
#define CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM 1
#endif
namespace ck {
@@ -233,10 +237,10 @@ struct GridwiseConvolutionImplicitGemm_v4_nchw_kcyx_nkhw_lds_double_buffer
// choose GEMM implementation here
const auto run_blockwise_gemm = [&](auto... Xs) {
#if 1
return blockwise_gemm.Run(Xs...);
#else
#if CK_USE_AMD_INLINE_ASM && CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM
return blockwise_gemm.Run_asm(Xs...);
#else
return blockwise_gemm.Run(Xs...);
#endif
};

View File

@@ -1,11 +1,11 @@
#pragma once
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_direct_convolution.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_direct_convolution.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "threadwise_direct_convolution.hpp"
namespace ck {

View File

@@ -1,11 +1,11 @@
#pragma once
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#include "composable_kernel/utility/common.hpp"
#include "common_header.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
#define CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
#include "composable_kernel/utility/common.hpp"
#include "common_header.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_BLOCKWISE_2D_TENSOR_OP_HPP
#define CK_BLOCKWISE_2D_TENSOR_OP_HPP
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_BLOCKWISE_3D_TENSOR_OP_HPP
#define CK_BLOCKWISE_3D_TENSOR_OP_HPP
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,9 @@
#ifndef CK_BLOCKWISE_4D_TENSOR_OP_HPP
#define CK_BLOCKWISE_4D_TENSOR_OP_HPP
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "threadwise_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,7 +1,9 @@
#ifndef CK_BLOCKWISE_BATCHED_GEMM_HPP
#define CK_BLOCKWISE_BATCHED_GEMM_HPP
#include "composable_kernel/tensor_operation/threadwise_gemm.hpp"
#include "common_header.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "threadwise_gemm.hpp"
namespace ck {

View File

@@ -1,8 +1,9 @@
#ifndef CK_BLOCKWISE_GEMM_HPP
#define CK_BLOCKWISE_GEMM_HPP
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_operation/threadwise_gemm.hpp"
#include "common_header.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "threadwise_gemm.hpp"
namespace ck {

View File

@@ -1,7 +1,10 @@
#ifndef CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP
#define CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,7 +1,9 @@
#ifndef CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP
#define CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "threadwise_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,7 +1,8 @@
#ifndef CK_THREADWISE_4D_TENSOR_OP_HPP
#define CK_THREADWISE_4D_TENSOR_OP_HPP
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,9 @@
#ifndef CK_THREADWISE_DIRECT_CONVOLUTION_HPP
#define CK_THREADWISE_DIRECT_CONVOLUTION_HPP
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "threadwise_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_THREADWISE_GEMM_HPP
#define CK_THREADWISE_GEMM_HPP
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "common_header.hpp"
#include "ConstantMatrixDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,9 @@
#ifndef CK_THREADWISE_GENERIC_TENSOR_OP_HPP
#define CK_THREADWISE_GENERIC_TENSOR_OP_HPP
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
namespace ck {
template <class Float, class TDesc>

View File

@@ -1,8 +1,9 @@
#ifndef CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP
#define CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,7 +1,8 @@
#ifndef CK_THREADWISE_TENSOR_SLICE_COPY_HPP
#define CK_THREADWISE_TENSOR_SLICE_COPY_HPP
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "common_header.hpp"
#include "ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_ARRAY_HPP
#define CK_ARRAY_HPP
#include "composable_kernel/utility/Sequence.hpp"
#include "composable_kernel/utility/functional2.hpp"
#include "Sequence.hpp"
#include "functional2.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_SEQUENCE_HPP
#define CK_SEQUENCE_HPP
#include "composable_kernel/utility/integral_constant.hpp"
#include "composable_kernel/utility/functional.hpp"
#include "integral_constant.hpp"
#include "functional.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP
#include "composable_kernel/utility/vector_type.hpp"
#include "vector_type.hpp"
#define NO_VM_WAIT 0
#define NO_LGKM_WAIT 0

View File

@@ -0,0 +1,18 @@
#ifndef CK_COMMON_HPP
#define CK_COMMON_HPP
#include "config.hpp"
#include "utility.hpp"
#include "vector_type.hpp"
#include "integral_constant.hpp"
#include "Sequence.hpp"
#include "Array.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "functional3.hpp"
#if CK_USE_AMD_INLINE_ASM
#include "amd_inline_asm.hpp"
#endif
#endif

View File

@@ -1,5 +1,5 @@
#ifndef CK_CONFIG_HPP
#define CK_CONFIG_HPP
#ifndef CK_CONFIG_AMD_HPP
#define CK_CONFIG_AMD_HPP
#cmakedefine01 CK_DEVICE_BACKEND_AMD

View File

@@ -1,5 +1,5 @@
#ifndef CK_CONFIG_CUDA_HPP
#define CK_CONFIG_CUDA_HPP
#ifndef CK_CONFIG_NVIDIA_HPP
#define CK_CONFIG_NVIDIA_HPP
#cmakedefine01 CK_DEVICE_BACKEND_NVIDIA

View File

@@ -1,8 +1,8 @@
#ifndef CK_FUNCTIONAL_HPP
#define CK_FUNCTIONAL_HPP
#include "composable_kernel/utility/integral_constant.hpp"
#include "composable_kernel/utility/Sequence.hpp"
#include "integral_constant.hpp"
#include "Sequence.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_FUNCTIONAL2_HPP
#define CK_FUNCTIONAL2_HPP
#include "composable_kernel/utility/functional.hpp"
#include "composable_kernel/utility/Sequence.hpp"
#include "functional.hpp"
#include "Sequence.hpp"
namespace ck {

View File

@@ -1,10 +1,10 @@
#ifndef CK_FUNCTIONAL3_HPP
#define CK_FUNCTIONAL3_HPP
#include "composable_kernel/utility/functional.hpp"
#include "composable_kernel/utility/functional2.hpp"
#include "composable_kernel/utility/Sequence.hpp"
#include "composable_kernel/utility/Array.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "Sequence.hpp"
#include "Array.hpp"
namespace ck {

View File

@@ -1,6 +1,8 @@
#ifndef CK_UTILITY_HPP
#define CK_UTILITY_HPP
#include "config.hpp"
namespace ck {
__device__ index_t get_thread_local_1d_id() { return threadIdx.x; }

View File

@@ -1,8 +1,8 @@
#ifndef CK_VECTOR_TYPE_HPP
#define CK_VECTOR_TYPE_HPP
#include "composable_kernel/utility/config.hpp"
#include "composable_kernel/utility/integral_constant.hpp"
#include "config.hpp"
#include "integral_constant.hpp"
namespace ck {

View File

@@ -1,7 +1,23 @@
set(TENSOR_SOURCE
src/tensor.cpp;
src/device.cpp;
)
add_library(tensor SHARED ${TENSOR_SOURCE})
target_compile_features(tensor PUBLIC)
set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
if(DEVICE_BACKEND STREQUAL "NVIDIA")
target_link_libraries(tensor nvToolsExt cudart)
endif()
install(TARGETS tensor LIBRARY DESTINATION lib)
if(DEVICE_BACKEND STREQUAL "AMD")
set(DRIVER_SOURCE driver.cpp)
set(DRIVER_SOURCE src/driver.cpp)
elseif(DEVICE_BACKEND STREQUAL "NVIDIA")
set(DRIVER_SOURCE driver.cu)
set(DRIVER_SOURCE src/driver.cu)
endif()
add_executable(driver ${DRIVER_SOURCE})

View File

@@ -1,7 +1,7 @@
#ifndef CK_CONV_COMMON_HPP
#define CK_CONV_COMMON_HPP
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "ConstantTensorDescriptor.hpp"
using namespace ck;

View File

@@ -2,7 +2,7 @@
#define CK_DEVICE_HPP
#include <memory>
#include "composable_kernel/utility/config.hpp"
#include "config.hpp"
using namespace ck;

View File

@@ -1,8 +1,9 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "tensor.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
#include "gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
using namespace ck;

View File

@@ -1,11 +1,12 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "tensor.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp"
#include "gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp"
#include "gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp"
#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp"
#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -1,9 +1,10 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "tensor.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp"
#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -1,9 +1,10 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "tensor.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp"
#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -1,9 +1,10 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "tensor.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -1,9 +1,10 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "tensor.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp"
#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp"
#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -1,7 +1,8 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
#include "tensor.hpp"
#include "gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
using namespace ck;

View File

@@ -1,7 +1,8 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp"
#include "tensor.hpp"
#include "gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp"
using namespace ck;

View File

View File

@@ -1,4 +1,4 @@
#include "composable_kernel/utility/config.hpp"
#include "config.hpp"
#include "device.hpp"
DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)

View File

@@ -3,9 +3,9 @@
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include "composable_kernel/utility/config.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "tensor.hpp"
#include "config.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "device.hpp"
#include "conv_common.hpp"
#include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp"

View File

@@ -1,17 +0,0 @@
#ifndef CK_COMMON_HPP
#define CK_COMMON_HPP
#include "composable_kernel/utility/utility.hpp"
#include "composable_kernel/utility/vector_type.hpp"
#include "composable_kernel/utility/integral_constant.hpp"
#include "composable_kernel/utility/Sequence.hpp"
#include "composable_kernel/utility/Array.hpp"
#include "composable_kernel/utility/functional.hpp"
#include "composable_kernel/utility/functional2.hpp"
#include "composable_kernel/utility/functional3.hpp"
#if CK_USE_AMD_INLINE_ASM
#include "composable_kernel/utility/amd_inline_asm.hpp"
#endif
#endif

View File

@@ -1,20 +0,0 @@
if(DEVICE_BACKEND STREQUAL "AMD")
configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config_amd.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp")
elseif(DEVICE_BACKEND STREQUAL "NVIDIA")
configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config_nvidia.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp")
endif()
set(TENSOR_SOURCE
tensor.cpp;
device.cpp;
)
add_library(tensor SHARED ${TENSOR_SOURCE})
target_compile_features(tensor PUBLIC)
set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
if(DEVICE_BACKEND STREQUAL "NVIDIA")
target_link_libraries(tensor nvToolsExt cudart)
endif()
install(TARGETS tensor LIBRARY DESTINATION lib)