reorginze files

[ROCm/composable_kernel commit: 81497a93a0]
This commit is contained in:
Chao Liu
2019-06-11 23:49:51 -05:00
parent 3dc2ff612c
commit 14451df1fd
59 changed files with 197 additions and 198 deletions

View File

@@ -45,6 +45,9 @@ elseif(DEVICE_BACKEND STREQUAL "CUDA")
endif()
#
include_directories(BEFORE src/include ${PROJECT_BINARY_DIR}/src/include)
include_directories(BEFORE
include
${PROJECT_BINARY_DIR}/include
)
add_subdirectory(src)
add_subdirectory(driver)

View File

@@ -2,7 +2,7 @@
#include <unistd.h>
#include "device.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
using namespace ck;

View File

@@ -2,10 +2,10 @@
#include <unistd.h>
#include "device.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp"
#include "gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp"
#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp"
#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -2,8 +2,8 @@
#include <unistd.h>
#include "device.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp"
#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -2,8 +2,8 @@
#include <unistd.h>
#include "device.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -2,8 +2,8 @@
#include <unistd.h>
#include "device.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -2,8 +2,8 @@
#include <unistd.h>
#include "device.hpp"
#include "gridwise_convolution_kernel_wrapper.hpp"
#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp"
#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp"
using namespace ck;

View File

@@ -1,7 +1,7 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
using namespace ck;

View File

@@ -1,7 +1,7 @@
#pragma once
#include <unistd.h>
#include "device.hpp"
#include "gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp"
#include "composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp"
using namespace ck;

View File

@@ -3,9 +3,9 @@
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include "config.hpp"
#include "composable_kernel/utility/config.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "tensor.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "conv_common.hpp"
#include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp"

View File

@@ -1,12 +1,12 @@
#ifndef CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_direct_convolution.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,15 +1,15 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_3d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,14 +1,14 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_copy.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
namespace ck {

View File

@@ -1,12 +1,12 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,13 +1,13 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,12 +1,12 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,12 +1,12 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,13 +1,13 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,13 +1,13 @@
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,11 +1,11 @@
#pragma once
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_direct_convolution.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "threadwise_direct_convolution.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_direct_convolution.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp"
namespace ck {

View File

@@ -1,11 +1,11 @@
#pragma once
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
#include "common.hpp"
#include "composable_kernel/utility/common.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
#define CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
#include "common.hpp"
#include "composable_kernel/utility/common.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_BLOCKWISE_2D_TENSOR_OP_HPP
#define CK_BLOCKWISE_2D_TENSOR_OP_HPP
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_BLOCKWISE_3D_TENSOR_OP_HPP
#define CK_BLOCKWISE_3D_TENSOR_OP_HPP
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_BLOCKWISE_4D_TENSOR_OP_HPP
#define CK_BLOCKWISE_4D_TENSOR_OP_HPP
#include "ConstantTensorDescriptor.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_BLOCKWISE_BATCHED_GEMM_HPP
#define CK_BLOCKWISE_BATCHED_GEMM_HPP
#include "threadwise_gemm.hpp"
#include "composable_kernel/tensor_operation/threadwise_gemm.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_BLOCKWISE_GEMM_HPP
#define CK_BLOCKWISE_GEMM_HPP
#include "common.hpp"
#include "threadwise_gemm.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_operation/threadwise_gemm.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP
#define CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP
#include "threadwise_generic_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP
#define CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP
#include "threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_THREADWISE_4D_TENSOR_OP_HPP
#define CK_THREADWISE_4D_TENSOR_OP_HPP
#include "ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_THREADWISE_DIRECT_CONVOLUTION_HPP
#define CK_THREADWISE_DIRECT_CONVOLUTION_HPP
#include "ConstantTensorDescriptor.hpp"
#include "threadwise_tensor_slice_copy.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_THREADWISE_GEMM_HPP
#define CK_THREADWISE_GEMM_HPP
#include "common.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "composable_kernel/utility/common.hpp"
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_THREADWISE_GENERIC_TENSOR_OP_HPP
#define CK_THREADWISE_GENERIC_TENSOR_OP_HPP
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
namespace ck {
template <class Float, class TDesc>

View File

@@ -1,8 +1,8 @@
#ifndef CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP
#define CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_THREADWISE_TENSOR_SLICE_COPY_HPP
#define CK_THREADWISE_TENSOR_SLICE_COPY_HPP
#include "ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_ARRAY_HPP
#define CK_ARRAY_HPP
#include "Sequence.hpp"
#include "functional2.hpp"
#include "composable_kernel/utility/Sequence.hpp"
#include "composable_kernel/utility/functional2.hpp"
namespace ck {

View File

@@ -1,8 +1,8 @@
#ifndef CK_SEQUENCE_HPP
#define CK_SEQUENCE_HPP
#include "integral_constant.hpp"
#include "functional.hpp"
#include "composable_kernel/utility/integral_constant.hpp"
#include "composable_kernel/utility/functional.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_AMD_INLINE_ASM_HPP
#define CK_AMD_INLINE_ASM_HPP
#include "common.hpp"
#include "composable_kernel/utility/vector_type.hpp"
#define NO_VM_WAIT 0
#define NO_LGKM_WAIT 0

View File

@@ -0,0 +1,17 @@
#ifndef CK_COMMON_HPP
#define CK_COMMON_HPP
#include "composable_kernel/utility/utility.hpp"
#include "composable_kernel/utility/vector_type.hpp"
#include "composable_kernel/utility/integral_constant.hpp"
#include "composable_kernel/utility/Sequence.hpp"
#include "composable_kernel/utility/Array.hpp"
#include "composable_kernel/utility/functional.hpp"
#include "composable_kernel/utility/functional2.hpp"
#include "composable_kernel/utility/functional3.hpp"
#if CK_USE_AMD_INLINE_ASM
#include "composable_kernel/utility/amd_inline_asm.hpp"
#endif
#endif

View File

@@ -1,8 +1,8 @@
#ifndef CK_FUNCTIONAL_HPP
#define CK_FUNCTIONAL_HPP
#include "integral_constant.hpp"
#include "Sequence.hpp"
#include "composable_kernel/utility/integral_constant.hpp"
#include "composable_kernel/utility/Sequence.hpp"
namespace ck {
@@ -38,7 +38,7 @@ struct static_if<true>
__host__ __device__ constexpr auto operator()(F f) const
{
// This is a trick for compiler:
// Pass forwarder to lambda "f" as "auto" argument, and maks sure "f" will use it,
// Pass forwarder to lambda "f" as "auto" argument, and make sure "f" will use it,
// this will make "f" a generic lambda, so that "f" won't be compiled until being
// instantiated here
f(forwarder{});
@@ -67,7 +67,7 @@ struct static_if<false>
__host__ __device__ static constexpr auto Else(F f)
{
// This is a trick for compiler:
// Pass forwarder to lambda "f" as "auto" argument, and maks sure "f" will use it,
// Pass forwarder to lambda "f" as "auto" argument, and make sure "f" will use it,
// this will make "f" a generic lambda, so that "f" won't be compiled until being
// instantiated here
f(forwarder{});

View File

@@ -1,8 +1,8 @@
#ifndef CK_FUNCTIONAL2_HPP
#define CK_FUNCTIONAL2_HPP
#include "functional.hpp"
#include "Sequence.hpp"
#include "composable_kernel/utility/functional.hpp"
#include "composable_kernel/utility/Sequence.hpp"
namespace ck {

View File

@@ -1,10 +1,10 @@
#ifndef CK_FUNCTIONAL3_HPP
#define CK_FUNCTIONAL3_HPP
#include "functional.hpp"
#include "functional2.hpp"
#include "Sequence.hpp"
#include "Array.hpp"
#include "composable_kernel/utility/functional.hpp"
#include "composable_kernel/utility/functional2.hpp"
#include "composable_kernel/utility/Sequence.hpp"
#include "composable_kernel/utility/Array.hpp"
namespace ck {

View File

@@ -1,5 +1,5 @@
#ifndef CK_BASE_HPP
#define CK_BASE_HPP
#ifndef CK_UTILITY_HPP
#define CK_UTILITY_HPP
namespace ck {
@@ -25,7 +25,8 @@ __host__ __device__ constexpr bool is_same_type(X, Y)
return is_same<X, Y>::value;
}
namespace math { // namespace math
namespace math {
template <class T, T s>
struct scales
{
@@ -106,7 +107,7 @@ __host__ __device__ constexpr T min(T x, Ts... xs)
}
// this is wrong
// TODO: implement correct least common multiple, instead of calling max()
// TODO: implement least common multiple properly, instead of calling max()
template <class T, class... Ts>
__host__ __device__ constexpr T lcm(T x, Ts... xs)
{

View File

@@ -1,8 +1,8 @@
#ifndef CK_VECTOR_TYPE_HPP
#define CK_VECTOR_TYPE_HPP
#include "config.hpp"
#include "integral_constant.hpp"
#include "composable_kernel/utility/config.hpp"
#include "composable_kernel/utility/integral_constant.hpp"
namespace ck {

View File

@@ -1,7 +1,7 @@
#ifndef CK_CONV_COMMON_HPP
#define CK_CONV_COMMON_HPP
#include "ConstantTensorDescriptor.hpp"
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
using namespace ck;

View File

@@ -2,7 +2,7 @@
#define CK_DEVICE_HPP
#include <memory>
#include "config.hpp"
#include "composable_kernel/utility/config.hpp"
using namespace ck;

View File

@@ -1,8 +1,6 @@
#ifndef CK_GRIDWISE_CONVOLUTION_KERNEL_WRAPPER
#define CK_GRIDWISE_CONVOLUTION_KERNEL_WRAPPER
namespace ck {
template <class GridwiseConvolution, class T>
__global__ void run_gridwise_convolution_kernel(const T* const __restrict__ p_in_global,
const T* const __restrict__ p_wei_global,
@@ -11,6 +9,4 @@ __global__ void run_gridwise_convolution_kernel(const T* const __restrict__ p_in
GridwiseConvolution{}.Run(p_in_global, p_wei_global, p_out_global);
}
} // namespace ck
#endif

View File

@@ -1,4 +1,4 @@
configure_file("${PROJECT_SOURCE_DIR}/src/include/config.hpp.in" "${PROJECT_BINARY_DIR}/src/include/config.hpp")
configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp")
set(TENSOR_SOURCE
tensor.cpp;
@@ -9,7 +9,6 @@ add_library(tensor SHARED ${TENSOR_SOURCE})
target_compile_features(tensor PUBLIC)
set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
if(DEVICE_BACKEND STREQUAL "CUDA")
target_link_libraries(tensor nvToolsExt cudart)
endif()

View File

@@ -1,4 +1,4 @@
#include "config.hpp"
#include "composable_kernel/utility/config.hpp"
#include "device.hpp"
DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)

View File

@@ -1,17 +0,0 @@
#ifndef CK_COMMON_HPP
#define CK_COMMON_HPP
#include "utility.hpp"
#include "vector_type.hpp"
#include "integral_constant.hpp"
#include "Sequence.hpp"
#include "Array.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "functional3.hpp"
#if CK_USE_AMD_INLINE_ASM
#include "amd_inline_asm.hpp"
#endif
#endif