mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
@@ -45,6 +45,9 @@ elseif(DEVICE_BACKEND STREQUAL "CUDA")
|
||||
endif()
|
||||
|
||||
#
|
||||
include_directories(BEFORE src/include ${PROJECT_BINARY_DIR}/src/include)
|
||||
include_directories(BEFORE
|
||||
include
|
||||
${PROJECT_BINARY_DIR}/include
|
||||
)
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(driver)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r1_chwn_cyxk_khwn.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r2_chwn_cyxk_khwn.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v2_chwn_cyxk_khwn_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "gridwise_convolution_kernel_wrapper.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp"
|
||||
#include "gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_convolution_implicit_gemm_v4_nchw_kcyx_nkhw_lds_double_buffer.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_direct_convolution_2_vectorized_nchw_kcyx_nkhw.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
#include <unistd.h>
|
||||
#include "device.hpp"
|
||||
#include "gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp"
|
||||
#include "composable_kernel/kernel_algorithm/gridwise_implicit_gemm_convolution_1_chwn_cyxk_khwn_padded.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
#include <initializer_list>
|
||||
#include <cstdlib>
|
||||
#include <stdlib.h>
|
||||
#include "config.hpp"
|
||||
#include "composable_kernel/utility/config.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "tensor.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "conv_common.hpp"
|
||||
#include "device_convolution_direct_v2_nchw_kcyx_nkhw.hpp"
|
||||
#include "device_convolution_implicit_gemm_v1_chwn_cyxk_khwn.hpp"
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
|
||||
#define CK_GRIDWISE_CONVOLUTION_DIRECT_V2_NCHW_KCYX_NKHW
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_direct_convolution.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R1_CHWN_CYXK_KHWN
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R2_CHWN_CYXK_KHWN
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_3d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_3d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_generic_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V1R3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "threadwise_generic_tensor_op.hpp"
|
||||
#include "blockwise_batched_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_batched_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V2_CHWN_CYXK_KHWN_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V3_NCHW_CYXK_NKHW_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
#include "threadwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
|
||||
#define CK_GRIDWISE_CONVOLUTION_IMPLICIT_GEMM_V4_NCHW_KCYX_NKHW_LDS_DOUBLE_BUFFER
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
#include "threadwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_direct_convolution.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "threadwise_direct_convolution.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_direct_convolution.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_direct_convolution.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "blockwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_2d_tensor_op.hpp"
|
||||
#include "threadwise_4d_tensor_op.hpp"
|
||||
#include "blockwise_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_2d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_4d_tensor_op.hpp"
|
||||
#include "composable_kernel/tensor_operation/blockwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
|
||||
#define CK_CONSTANT_MATRIX_DESCRIPTOR_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
|
||||
#define CK_CONSTANT_MERGED_TENSOR_DESCRIPTOR_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
|
||||
#define CK_CONSTANT_TENSOR_DESCRIPTOR_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_BLOCKWISE_2D_TENSOR_OP_HPP
|
||||
#define CK_BLOCKWISE_2D_TENSOR_OP_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_BLOCKWISE_3D_TENSOR_OP_HPP
|
||||
#define CK_BLOCKWISE_3D_TENSOR_OP_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_BLOCKWISE_4D_TENSOR_OP_HPP
|
||||
#define CK_BLOCKWISE_4D_TENSOR_OP_HPP
|
||||
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_BLOCKWISE_BATCHED_GEMM_HPP
|
||||
#define CK_BLOCKWISE_BATCHED_GEMM_HPP
|
||||
|
||||
#include "threadwise_gemm.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_BLOCKWISE_GEMM_HPP
|
||||
#define CK_BLOCKWISE_GEMM_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "threadwise_gemm.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP
|
||||
#define CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_HPP
|
||||
|
||||
#include "threadwise_generic_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_generic_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP
|
||||
#define CK_BLOCKWISE_TENSOR_SLICE_COPY_HPP
|
||||
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_THREADWISE_4D_TENSOR_OP_HPP
|
||||
#define CK_THREADWISE_4D_TENSOR_OP_HPP
|
||||
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_THREADWISE_DIRECT_CONVOLUTION_HPP
|
||||
#define CK_THREADWISE_DIRECT_CONVOLUTION_HPP
|
||||
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "threadwise_tensor_slice_copy.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_operation/threadwise_tensor_slice_copy.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_THREADWISE_GEMM_HPP
|
||||
#define CK_THREADWISE_GEMM_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "ConstantMatrixDescriptor.hpp"
|
||||
#include "composable_kernel/utility/common.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMatrixDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_THREADWISE_GENERIC_TENSOR_OP_HPP
|
||||
#define CK_THREADWISE_GENERIC_TENSOR_OP_HPP
|
||||
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
template <class Float, class TDesc>
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP
|
||||
#define CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_HPP
|
||||
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "ConstantMergedTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantMergedTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_THREADWISE_TENSOR_SLICE_COPY_HPP
|
||||
#define CK_THREADWISE_TENSOR_SLICE_COPY_HPP
|
||||
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_ARRAY_HPP
|
||||
#define CK_ARRAY_HPP
|
||||
|
||||
#include "Sequence.hpp"
|
||||
#include "functional2.hpp"
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
#include "composable_kernel/utility/functional2.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_SEQUENCE_HPP
|
||||
#define CK_SEQUENCE_HPP
|
||||
|
||||
#include "integral_constant.hpp"
|
||||
#include "functional.hpp"
|
||||
#include "composable_kernel/utility/integral_constant.hpp"
|
||||
#include "composable_kernel/utility/functional.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_AMD_INLINE_ASM_HPP
|
||||
#define CK_AMD_INLINE_ASM_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "composable_kernel/utility/vector_type.hpp"
|
||||
|
||||
#define NO_VM_WAIT 0
|
||||
#define NO_LGKM_WAIT 0
|
||||
17
include/composable_kernel/utility/common.hpp
Normal file
17
include/composable_kernel/utility/common.hpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#ifndef CK_COMMON_HPP
|
||||
#define CK_COMMON_HPP
|
||||
|
||||
#include "composable_kernel/utility/utility.hpp"
|
||||
#include "composable_kernel/utility/vector_type.hpp"
|
||||
#include "composable_kernel/utility/integral_constant.hpp"
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
#include "composable_kernel/utility/Array.hpp"
|
||||
#include "composable_kernel/utility/functional.hpp"
|
||||
#include "composable_kernel/utility/functional2.hpp"
|
||||
#include "composable_kernel/utility/functional3.hpp"
|
||||
|
||||
#if CK_USE_AMD_INLINE_ASM
|
||||
#include "composable_kernel/utility/amd_inline_asm.hpp"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_FUNCTIONAL_HPP
|
||||
#define CK_FUNCTIONAL_HPP
|
||||
|
||||
#include "integral_constant.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "composable_kernel/utility/integral_constant.hpp"
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -38,7 +38,7 @@ struct static_if<true>
|
||||
__host__ __device__ constexpr auto operator()(F f) const
|
||||
{
|
||||
// This is a trick for compiler:
|
||||
// Pass forwarder to lambda "f" as "auto" argument, and maks sure "f" will use it,
|
||||
// Pass forwarder to lambda "f" as "auto" argument, and make sure "f" will use it,
|
||||
// this will make "f" a generic lambda, so that "f" won't be compiled until being
|
||||
// instantiated here
|
||||
f(forwarder{});
|
||||
@@ -67,7 +67,7 @@ struct static_if<false>
|
||||
__host__ __device__ static constexpr auto Else(F f)
|
||||
{
|
||||
// This is a trick for compiler:
|
||||
// Pass forwarder to lambda "f" as "auto" argument, and maks sure "f" will use it,
|
||||
// Pass forwarder to lambda "f" as "auto" argument, and make sure "f" will use it,
|
||||
// this will make "f" a generic lambda, so that "f" won't be compiled until being
|
||||
// instantiated here
|
||||
f(forwarder{});
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_FUNCTIONAL2_HPP
|
||||
#define CK_FUNCTIONAL2_HPP
|
||||
|
||||
#include "functional.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "composable_kernel/utility/functional.hpp"
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
#ifndef CK_FUNCTIONAL3_HPP
|
||||
#define CK_FUNCTIONAL3_HPP
|
||||
|
||||
#include "functional.hpp"
|
||||
#include "functional2.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "Array.hpp"
|
||||
#include "composable_kernel/utility/functional.hpp"
|
||||
#include "composable_kernel/utility/functional2.hpp"
|
||||
#include "composable_kernel/utility/Sequence.hpp"
|
||||
#include "composable_kernel/utility/Array.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef CK_BASE_HPP
|
||||
#define CK_BASE_HPP
|
||||
#ifndef CK_UTILITY_HPP
|
||||
#define CK_UTILITY_HPP
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -25,7 +25,8 @@ __host__ __device__ constexpr bool is_same_type(X, Y)
|
||||
return is_same<X, Y>::value;
|
||||
}
|
||||
|
||||
namespace math { // namespace math
|
||||
namespace math {
|
||||
|
||||
template <class T, T s>
|
||||
struct scales
|
||||
{
|
||||
@@ -106,7 +107,7 @@ __host__ __device__ constexpr T min(T x, Ts... xs)
|
||||
}
|
||||
|
||||
// this is wrong
|
||||
// TODO: implement correct least common multiple, instead of calling max()
|
||||
// TODO: implement least common multiple properly, instead of calling max()
|
||||
template <class T, class... Ts>
|
||||
__host__ __device__ constexpr T lcm(T x, Ts... xs)
|
||||
{
|
||||
@@ -1,8 +1,8 @@
|
||||
#ifndef CK_VECTOR_TYPE_HPP
|
||||
#define CK_VECTOR_TYPE_HPP
|
||||
|
||||
#include "config.hpp"
|
||||
#include "integral_constant.hpp"
|
||||
#include "composable_kernel/utility/config.hpp"
|
||||
#include "composable_kernel/utility/integral_constant.hpp"
|
||||
|
||||
namespace ck {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CK_CONV_COMMON_HPP
|
||||
#define CK_CONV_COMMON_HPP
|
||||
|
||||
#include "ConstantTensorDescriptor.hpp"
|
||||
#include "composable_kernel/tensor_description/ConstantTensorDescriptor.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#define CK_DEVICE_HPP
|
||||
|
||||
#include <memory>
|
||||
#include "config.hpp"
|
||||
#include "composable_kernel/utility/config.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
#ifndef CK_GRIDWISE_CONVOLUTION_KERNEL_WRAPPER
|
||||
#define CK_GRIDWISE_CONVOLUTION_KERNEL_WRAPPER
|
||||
|
||||
namespace ck {
|
||||
|
||||
template <class GridwiseConvolution, class T>
|
||||
__global__ void run_gridwise_convolution_kernel(const T* const __restrict__ p_in_global,
|
||||
const T* const __restrict__ p_wei_global,
|
||||
@@ -11,6 +9,4 @@ __global__ void run_gridwise_convolution_kernel(const T* const __restrict__ p_in
|
||||
GridwiseConvolution{}.Run(p_in_global, p_wei_global, p_out_global);
|
||||
}
|
||||
|
||||
} // namespace ck
|
||||
|
||||
#endif
|
||||
@@ -1,4 +1,4 @@
|
||||
configure_file("${PROJECT_SOURCE_DIR}/src/include/config.hpp.in" "${PROJECT_BINARY_DIR}/src/include/config.hpp")
|
||||
configure_file("${PROJECT_SOURCE_DIR}/include/composable_kernel/utility/config.hpp.in" "${PROJECT_BINARY_DIR}/include/composable_kernel/utility/config.hpp")
|
||||
|
||||
set(TENSOR_SOURCE
|
||||
tensor.cpp;
|
||||
@@ -9,7 +9,6 @@ add_library(tensor SHARED ${TENSOR_SOURCE})
|
||||
target_compile_features(tensor PUBLIC)
|
||||
set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
|
||||
if(DEVICE_BACKEND STREQUAL "CUDA")
|
||||
target_link_libraries(tensor nvToolsExt cudart)
|
||||
endif()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "config.hpp"
|
||||
#include "composable_kernel/utility/config.hpp"
|
||||
#include "device.hpp"
|
||||
|
||||
DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
#ifndef CK_COMMON_HPP
|
||||
#define CK_COMMON_HPP
|
||||
|
||||
#include "utility.hpp"
|
||||
#include "vector_type.hpp"
|
||||
#include "integral_constant.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "Array.hpp"
|
||||
#include "functional.hpp"
|
||||
#include "functional2.hpp"
|
||||
#include "functional3.hpp"
|
||||
|
||||
#if CK_USE_AMD_INLINE_ASM
|
||||
#include "amd_inline_asm.hpp"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user