This commit is contained in:
Chao Liu
2019-09-19 23:44:23 -05:00
parent b6e1c52a80
commit bf7e7d62a8
7 changed files with 165 additions and 128 deletions

View File

@@ -51,7 +51,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded(InDesc,
wei_kcyx_device_buf.ToDevice(wei_kcyx.mData.data());
out_nkhw_device_buf.ToDevice(out_nkhw.mData.data());
#if 0
#if 1
// BlockSize = 256, each thread hold 64 data
constexpr index_t BlockSize = 256;