reimplement threadwise copy

[ROCm/composable_kernel commit: fdcfae3a62]
This commit is contained in:
Chao Liu
2019-08-06 17:41:58 -05:00
parent 0319e69e81
commit fb1953ed86
10 changed files with 223 additions and 50 deletions

View File

@@ -379,7 +379,7 @@ int main(int argc, char* argv[])
#elif 0
device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(
(in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
#elif 0
#elif 1
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
in_nchw,
wei_kcyx_desc,