mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
@@ -46,8 +46,19 @@ endif()
|
||||
|
||||
#
|
||||
include_directories(BEFORE
|
||||
include
|
||||
${PROJECT_BINARY_DIR}/include
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation
|
||||
${PROJECT_SOURCE_DIR}/composable_kernel/include/kernel_algorithm
|
||||
${PROJECT_SOURCE_DIR}/driver/include
|
||||
${PROJECT_BINARY_DIR}/composable_kernel/include/utility
|
||||
)
|
||||
add_subdirectory(src)
|
||||
|
||||
if(DEVICE_BACKEND STREQUAL "AMD")
|
||||
configure_file("${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/config_amd.hpp.in" "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/config.hpp")
|
||||
elseif(DEVICE_BACKEND STREQUAL "NVIDIA")
|
||||
configure_file("${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/config_nvidia.hpp.in" "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/config.hpp")
|
||||
endif()
|
||||
|
||||
add_subdirectory(driver)
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
|
||||
#define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_direct_convolution.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_3d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_generic_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_generic_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
#include "threadwise_generic_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
#include "threadwise_generic_tensor_slice_copy.hpp"
|
||||
|
||||
#ifndef CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM
|
||||
#define CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM 1
|
||||
#endif
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -233,10 +237,10 @@ struct GridwiseConvolutionImplicitGemm_v4_nchw_kcyx_nkhw_lds_double_buffer
|
||||
|
||||
// choose GEMM implementation here
|
||||
const auto run_blockwise_gemm = [&](auto... Xs) {
|
||||
#if 1
|
||||
return blockwise_gemm.Run(Xs...);
|
||||
#else
|
||||
#if CK_USE_AMD_INLINE_ASM && CK_BLOCKWISE_GEMM_USE_AMD_INLINE_ASM
|
||||
return blockwise_gemm.Run_asm(Xs...);
|
||||
#else
|
||||
return blockwise_gemm.Run(Xs...);
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_direct_convolution.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_direct_convolution.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_direct_convolution.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
|
||||
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "common_header.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
|
||||
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
|
||||
#define CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "common_header.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_BLOCKWISE_2D_TENSOR_OP_HPP
|
||||
#define CK_BLOCKWISE_2D_TENSOR_OP_HPP
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_BLOCKWISE_3D_TENSOR_OP_HPP
|
||||
#define CK_BLOCKWISE_3D_TENSOR_OP_HPP
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#ifndef CK_BLOCKWISE_4D_TENSOR_OP_HPP
|
||||
#define CK_BLOCKWISE_4D_TENSOR_OP_HPP
|
||||
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#ifndef CK_BLOCKWISE_BATCHED_GEMM_HPP
|
||||
#define CK_BLOCKWISE_BATCHED_GEMM_HPP
|
||||
|
||||
#include "composable_kernel/tensor_operation/threadwise_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "threadwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#ifndef CK_BLOCKWISE_GEMM_HPP
|
||||
#define CK_BLOCKWISE_GEMM_HPP
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_gemm.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "threadwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
#ifndef CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP
|
||||
#define CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP
|
||||
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "threadwise_generic_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#ifndef CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP
|
||||
#define CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP
|
||||
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#ifndef CK_THREADWISE_4D_TENSOR_OP_HPP
|
||||
#define CK_THREADWISE_4D_TENSOR_OP_HPP
|
||||
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#ifndef CK_THREADWISE_DIRECT_CONVOLUTION_HPP
|
||||
#define CK_THREADWISE_DIRECT_CONVOLUTION_HPP
|
||||
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_THREADWISE_GEMM_HPP
|
||||
#define CK_THREADWISE_GEMM_HPP
|
||||
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#ifndef CK_THREADWISE_GENERIC_TENSOR_OP_HPP
|
||||
#define CK_THREADWISE_GENERIC_TENSOR_OP_HPP
|
||||
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
template <class Float, class TDesc>
|
||||
@@ -1,8 +1,9 @@
|
||||
#ifndef CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP
|
||||
#define CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP
|
||||
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#ifndef CK_THREADWISE_TENSOR_SLICE_COPY_HPP
|
||||
#define CK_THREADWISE_TENSOR_SLICE_COPY_HPP
|
||||
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "common_header.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_ARRAY_HPP
|
||||
#define CK_ARRAY_HPP
|
||||
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
#include "composable_kernel/utility/functional2.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "functional2.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_SEQUENCE_HPP
|
||||
#define CK_SEQUENCE_HPP
|
||||
|
||||
#include "composable_kernel/utility/integral_constant.hpp"
|
||||
#include "composable_kernel/utility/functional.hpp"
|
||||
#include "integral_constant.hpp"
|
||||
#include "functional.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_AMD_INLINE_ASM_HPP
|
||||
#define CK_AMD_INLINE_ASM_HPP
|
||||
|
||||
#include "composable_kernel/utility/vector_type.hpp"
|
||||
#include "vector_type.hpp"
|
||||
|
||||
#define NO_VM_WAIT 0
|
||||
#define NO_LGKM_WAIT 0
|
||||
18
composable_kernel/include/utility/common_header.hpp
Normal file
18
composable_kernel/include/utility/common_header.hpp
Normal file
@@ -0,0 +1,18 @@
|
||||
#ifndef CK_COMMON_HPP
|
||||
#define CK_COMMON_HPP
|
||||
|
||||
#include "config.hpp"
|
||||
#include "utility.hpp"
|
||||
#include "vector_type.hpp"
|
||||
#include "integral_constant.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "Array.hpp"
|
||||
#include "functional.hpp"
|
||||
#include "functional2.hpp"
|
||||
#include "functional3.hpp"
|
||||
|
||||
#if CK_USE_AMD_INLINE_ASM
|
||||
#include "amd_inline_asm.hpp"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef CK_CONFIG_HPP
|
||||
#define CK_CONFIG_HPP
|
||||
#ifndef CK_CONFIG_AMD_HPP
|
||||
#define CK_CONFIG_AMD_HPP
|
||||
|
||||
#cmakedefine01 CK_DEVICE_BACKEND_AMD
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef CK_CONFIG_CUDA_HPP
|
||||
#define CK_CONFIG_CUDA_HPP
|
||||
#ifndef CK_CONFIG_NVIDIA_HPP
|
||||
#define CK_CONFIG_NVIDIA_HPP
|
||||
|
||||
#cmakedefine01 CK_DEVICE_BACKEND_NVIDIA
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_FUNCTIONAL_HPP
|
||||
#define CK_FUNCTIONAL_HPP
|
||||
|
||||
#include "composable_kernel/utility/integral_constant.hpp"
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
#include "integral_constant.hpp"
|
||||
#include "Sequence.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_FUNCTIONAL2_HPP
|
||||
#define CK_FUNCTIONAL2_HPP
|
||||
|
||||
#include "composable_kernel/utility/functional.hpp"
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
#include "functional.hpp"
|
||||
#include "Sequence.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
#ifndef CK_FUNCTIONAL3_HPP
|
||||
#define CK_FUNCTIONAL3_HPP
|
||||
|
||||
#include "composable_kernel/utility/functional.hpp"
|
||||
#include "composable_kernel/utility/functional2.hpp"
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
#include "composable_kernel/utility/Array.hpp"
|
||||
#include "functional.hpp"
|
||||
#include "functional2.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "Array.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#ifndef CK_UTILITY_HPP
|
||||
#define CK_UTILITY_HPP
|
||||
|
||||
#include "config.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
__device__ index_t get_thread_local_1d_id() { return threadIdx.x; }
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_VECTOR_TYPE_HPP
|
||||
#define CK_VECTOR_TYPE_HPP
|
||||
|
||||
#include "composable_kernel/utility/config.hpp"
|
||||
#include "composable_kernel/utility/integral_constant.hpp"
|
||||
#include "config.hpp"
|
||||
#include "integral_constant.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,23 @@
|
||||
set(TENSOR_SOURCE
|
||||
src/tensor.cpp;
|
||||
src/device.cpp;
|
||||
)
|
||||
|
||||
add_library(tensor SHARED ${TENSOR_SOURCE})
|
||||
target_compile_features(tensor PUBLIC)
|
||||
set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(DEVICE_BACKEND STREQUAL "NVIDIA")
|
||||
target_link_libraries(tensor nvToolsExt cudart)
|
||||
endif()
|
||||
|
||||
install(TARGETS tensor LIBRARY DESTINATION lib)
|
||||
|
||||
|
||||
if(DEVICE_BACKEND STREQUAL "AMD")
|
||||
set(DRIVER_SOURCE driver.cpp)
|
||||
set(DRIVER_SOURCE src/driver.cpp)
|
||||
elseif(DEVICE_BACKEND STREQUAL "NVIDIA")
|
||||
set(DRIVER_SOURCE driver.cu)
|
||||
set(DRIVER_SOURCE src/driver.cu)
|
||||
endif()
|
||||
|
||||
add_executable(driver ${DRIVER_SOURCE})
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_CONV_COMMON_HPP
|
||||
#define CK_CONV_COMMON_HPP
|
||||
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#define CK_DEVICE_HPP
|
||||
|
||||
#include <memory>
|
||||
#include "composable_kernel/utility/config.hpp"
|
||||
#include "config.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
|
||||
#include "gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
0
driver/src/CMakeLists.txt
Normal file
0
driver/src/CMakeLists.txt
Normal file
@@ -1,4 +1,4 @@
|
||||
#include "composable_kernel/utility/config.hpp"
|
||||
#include "config.hpp"
|
||||
#include "device.hpp"
|
||||
|
||||
DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
|
||||
@@ -3,9 +3,9 @@
|
||||
#include <initializer_list>
|
||||
#include <cstdlib>
|
||||
#include <stdlib.h>
|
||||
#include "composable_kernel/utility/config.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "config.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "device.hpp"
|
||||
#include "conv_common.hpp"
|
||||
#include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
|
||||
#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp"
|
||||
@@ -1,17 +0,0 @@
|
||||
#ifndef CK_COMMON_HPP
|
||||
#define CK_COMMON_HPP
|
||||
|
||||
#include "composable_kernel/utility/utility.hpp"
|
||||
#include "composable_kernel/utility/vector_type.hpp"
|
||||
#include "composable_kernel/utility/integral_constant.hpp"
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
#include "composable_kernel/utility/Array.hpp"
|
||||
#include "composable_kernel/utility/functional.hpp"
|
||||
#include "composable_kernel/utility/functional2.hpp"
|
||||
#include "composable_kernel/utility/functional3.hpp"
|
||||
|
||||
#if CK_USE_AMD_INLINE_ASM
|
||||
#include "composable_kernel/utility/amd_inline_asm.hpp"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,20 +0,0 @@
|
||||
if(DEVICE_BACKEND STREQUAL "AMD")
|
||||
configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config_amd.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp")
|
||||
elseif(DEVICE_BACKEND STREQUAL "NVIDIA")
|
||||
configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config_nvidia.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp")
|
||||
endif()
|
||||
|
||||
set(TENSOR_SOURCE
|
||||
tensor.cpp;
|
||||
device.cpp;
|
||||
)
|
||||
|
||||
add_library(tensor SHARED ${TENSOR_SOURCE})
|
||||
target_compile_features(tensor PUBLIC)
|
||||
set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(DEVICE_BACKEND STREQUAL "NVIDIA")
|
||||
target_link_libraries(tensor nvToolsExt cudart)
|
||||
endif()
|
||||
|
||||
install(TARGETS tensor LIBRARY DESTINATION lib)
|
||||
Reference in New Issue
Block a user