do more benchmark

[ROCm/composable_kernel commit: 85ae70d3d3]
This commit is contained in:
Chao Liu
2019-06-26 21:43:26 -05:00
parent f9dd497fc9
commit c37a237f00
3 changed files with 45 additions and 936 deletions

View File

@@ -59,7 +59,7 @@ void device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw(InDesc,
constexpr index_t B = (N * Ho * Wo) / (N1 * N2);
#if 0
#if 1
constexpr index_t BlockSize = 256;
constexpr index_t BPerBlock = 16;
@@ -93,7 +93,7 @@ void device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 4;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 1
#elif 0
constexpr index_t BlockSize = 256;
constexpr index_t BPerBlock = 16;