implicit gemm v1r2: only load 1d filter

This commit is contained in:
Chao Liu
2019-04-13 11:19:17 -05:00
parent 96ee9571e2
commit 00899f191b
17 changed files with 426 additions and 142 deletions

View File

@@ -12,7 +12,7 @@ __device__ void threadwise_4d_tensor_pointwise_operation_unary(Desc, Float* __re
constexpr auto desc = Desc{};
#if 0
if(threadIdx.x == 0)
if(get_thread_local_1d_id() == 0)
{
print_ConstantTensorDescriptor(desc, "threadwise_4d_tensor_op_unary: ");
}
@@ -218,7 +218,7 @@ __device__ void threadwise_4d_tensor_shift_down(Desc, Float* __restrict__ p, IDi
constexpr auto desc = Desc{};
#if 0
if(threadIdx.x == 0)
if(get_thread_local_1d_id() == 0)
{
print_ConstantTensorDescriptor(desc, "threadwise_4d_tensor_shift_down: ");
}