clipboard float4 copy and paste C++ code

This commit is contained in:
Chao Liu
2019-04-05 02:13:29 -05:00
parent 605afd0fb6
commit dabfa77fc6
6 changed files with 246 additions and 155 deletions

View File

@@ -238,9 +238,6 @@ void device_implicit_gemm_convolution_2_chwn_cyxk_khwn(InDesc,
constexpr index_t GemmNLevel1Cluster = 4;
constexpr index_t GemmKPerThreadLoop = 1;
constexpr index_t GemmThreadPerColumnPerCluster = 8;
constexpr index_t GemmThreadPerRowPerCluster = 8;
constexpr index_t InBlockCopyThreadPerDim0 = 4;
constexpr index_t InBlockCopyThreadPerDim1 = 16;
@@ -272,7 +269,7 @@ void device_implicit_gemm_convolution_2_chwn_cyxk_khwn(InDesc,
for(index_t i = 0; i < nrepeat; ++i)
{
constexpr auto gridwise_conv =
#if 1
#if 0
GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn
#else
GridwiseConvolutionImplicitGemm_v2_chwn_cyxk_khwn_lds_double_buffer
@@ -288,8 +285,6 @@ void device_implicit_gemm_convolution_2_chwn_cyxk_khwn(InDesc,
CPerBlock,
BPerThread,
KPerThread,
GemmThreadPerColumnPerCluster,
GemmThreadPerRowPerCluster,
GemmMPerThreadSubC,
GemmNPerThreadSubC,
GemmMLevel0Cluster,