implicit gemm v1r2: only load 1d filter

[ROCm/composable_kernel commit: 00899f191b]
This commit is contained in:
Chao Liu
2019-04-13 11:19:17 -05:00
parent 2fc34a9169
commit 8b7eafe959
17 changed files with 426 additions and 142 deletions

View File

@@ -20,7 +20,7 @@ __device__ void threadwise_direct_convolution_1(InDesc,
constexpr auto out_desc = OutDesc{};
#if 0
if(blockIdx.x == 0 && threadIdx.x == 0)
if(blockIdx.x == 0 && get_thread_local_1d_id() == 0)
{
print_ConstantTensorDescriptor(in_desc, "threadwise_direct_convolution: in_desc: ");
print_ConstantTensorDescriptor(wei_desc, "threadwise_direct_convolution: wei_desc: ");