From cb6475c77d74f9d9f0a5fb2c0b80d5008fe420da Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Fri, 23 Aug 2019 09:59:23 -0500 Subject: [PATCH] clean --- ...nvolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp | 8 +++----- driver/src/driver.cpp | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp index 19074c80c9..c0c41d53fe 100644 --- a/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp +++ b/composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v1r3_chwn_cyxk_khwn_padded.hpp @@ -138,7 +138,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded constexpr auto out_k_h_w_n_thread_desc = make_ConstantTensorDescriptor_packed( Sequence{}); -#if 0 +#if 1 // blockwise input copy // format is [C, Hi, Wi, N] auto blockwise_in_copy = @@ -180,7 +180,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded {0, 0, 0, 0}); #endif -#if 0 +#if 1 // blockwise wei copy // format is [CPerBlock, KPerBlock] const auto blockwise_wei_copy = @@ -278,7 +278,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded // set threadwise output tensor to 0 threadwise_matrix_set_zero(c_k_wn_thread_mtx_desc, p_out_thread); -#if 0 +#if 1 for(index_t y = 0; y < Y; ++y) { for(index_t x = 0; x < X; ++x) @@ -318,10 +318,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded for(index_t c_block_data_begin = 0; c_block_data_begin < C; c_block_data_begin += CPerBlock) { -#if 1 // debug blockwise_in_copy.Run(); blockwise_wei_copy.Run(); -#endif __syncthreads(); diff --git a/driver/src/driver.cpp b/driver/src/driver.cpp index dd9dffb9f5..670acaee61 100644 --- a/driver/src/driver.cpp +++ b/driver/src/driver.cpp @@ -74,7 +74,7 @@ int main(int argc, char* argv[]) #if 1 constexpr index_t N = 64; - constexpr index_t C = 8; + constexpr index_t C = 1536; constexpr index_t HI = 8; constexpr index_t WI = 8; constexpr index_t K = 256; @@ -368,7 +368,7 @@ int main(int argc, char* argv[]) #if 0 device_convolution_direct_v2_nchw_kcyx_nkhw (in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat); -#elif 0 +#elif 1 device_convolution_implicit_gemm_v1_chwn_cyxk_khwn( in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat); #elif 1