tweaked params for direct conv; added a dummy winograd

This commit is contained in:
Chao Liu
2018-11-24 03:55:36 -06:00
parent dbffe05a98
commit 8732ea04fb
8 changed files with 529 additions and 23 deletions

View File

@@ -214,8 +214,7 @@ __global__ void gridwise_direct_convolution_2(InGlobalDesc,
__syncthreads();
for(unsigned c_thread_data_offset = 0; c_thread_data_offset < CPerBlock;
c_thread_data_offset += CPerThread)
for(unsigned c_thread_data = 0; c_thread_data < CPerBlock; c_thread_data += CPerThread)
{
// copy input tensor into register
threadwise_4d_tensor_op_binary<TFloat,
@@ -224,7 +223,7 @@ __global__ void gridwise_direct_convolution_2(InGlobalDesc,
decltype(f_copy)>(
in_thread_block_desc,
p_in_block + in_block_desc.Get1dIndex(n_thread_data_offset,
c_thread_data_offset,
c_thread_data,
hi_thread_data_offset,
wi_thread_data_offset),
in_thread_desc,
@@ -237,8 +236,7 @@ __global__ void gridwise_direct_convolution_2(InGlobalDesc,
decltype(wei_thread_desc),
decltype(f_copy)>(
wei_thread_block_desc,
p_wei_block +
wei_block_desc.Get1dIndex(k_thread_data_offset, c_thread_data_offset, 0, 0),
p_wei_block + wei_block_desc.Get1dIndex(k_thread_data_offset, c_thread_data, 0, 0),
wei_thread_desc,
p_wei_thread,
f_copy);
@@ -269,4 +267,4 @@ __global__ void gridwise_direct_convolution_2(InGlobalDesc,
ho_block_data_offset + ho_thread_data_offset,
wo_block_data_offset + wo_thread_data_offset),
f_copy);
}
}