From 33c38e2a2370f13b8da550a07c8c70980942ace6 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Mon, 3 Jun 2019 10:50:25 -0500 Subject: [PATCH] tuned --- ...convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp b/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp index 2b8cb3b7dc..618d330534 100644 --- a/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp +++ b/driver/device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw.hpp @@ -75,14 +75,14 @@ void device_convolution_implicit_gemm_v4_nchw_kcyx_nkhw(InDesc, constexpr index_t InBlockCopySrcDataPerRead_B = 1; constexpr index_t InBlockCopyDstDataPerWrite_N2 = 4; - using WeiBlockCopySubLengths_E_K = Sequence<1, 4>; - using WeiBlockCopyClusterLengths_E_K = Sequence<8, 32>; - using WeiBlockCopyThreadClusterArrangeOrder = Sequence<1, 0>; // [E, K] - using WeiBlockCopySrcAccessOrder = Sequence<1, 0>; // [E, K] - using WeiBlockCopyDstAccessOrder = Sequence<0, 1>; // [K, E] + using WeiBlockCopySubLengths_E_K = Sequence<4, 1>; + using WeiBlockCopyClusterLengths_E_K = Sequence<2, 128>; + using WeiBlockCopyThreadClusterArrangeOrder = Sequence<1, 0>; // [K, E] + using WeiBlockCopySrcAccessOrder = Sequence<1, 0>; // [K, E] + using WeiBlockCopyDstAccessOrder = Sequence<0, 1>; // [E, K] - constexpr index_t WeiBlockCopySrcDataPerRead_E = 1; - constexpr index_t WeiBlockCopyDstDataPerWrite_K = 4; + constexpr index_t WeiBlockCopySrcDataPerRead_E = 4; + constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1; #endif constexpr index_t GridSize =