do more benchmark

This commit is contained in:
Chao Liu
2019-06-26 21:43:26 -05:00
parent 35269cf77a
commit 85ae70d3d3
3 changed files with 45 additions and 936 deletions

View File

@@ -59,7 +59,7 @@ void device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw(InDesc,
constexpr index_t B = (N * Ho * Wo) / (N1 * N2);
#if 0
#if 1
constexpr index_t BlockSize = 256;
constexpr index_t BPerBlock = 16;
@@ -93,7 +93,7 @@ void device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 4;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 1
#elif 0
constexpr index_t BlockSize = 256;
constexpr index_t BPerBlock = 16;