From 4543d17a7191c16431e0b26876e59e6c35a2a692 Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Tue, 19 Feb 2019 22:07:15 -0600 Subject: [PATCH] refactor --- .../device_implicit_gemm_convolution_1_chwn_csrk_khwn.hpp | 3 --- driver/driver.hip.cpp | 2 +- ...ise_implicit_gemm_convolution_1_chwn_csrk_khwn.hip.hpp | 8 +++++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/driver/device_implicit_gemm_convolution_1_chwn_csrk_khwn.hpp b/driver/device_implicit_gemm_convolution_1_chwn_csrk_khwn.hpp index 81fef7dcbd..14e4d29f74 100644 --- a/driver/device_implicit_gemm_convolution_1_chwn_csrk_khwn.hpp +++ b/driver/device_implicit_gemm_convolution_1_chwn_csrk_khwn.hpp @@ -227,9 +227,6 @@ void device_implicit_gemm_convolution_1_chwn_csrk_khwn(InDesc, constexpr unsigned HoPerThread = 1; constexpr unsigned WoPerThread = 1; - constexpr unsigned WeiBlockCopyThreadPerDim0 = 4; - constexpr unsigned WeiBlockCopyThreadPerDim1 = 32; - constexpr unsigned InBlockCopy_ThreadPerDimC = 8; constexpr unsigned InBlockCopy_ThreadPerDimH = 2; constexpr unsigned InBlockCopy_ThreadPerDimW = 2; diff --git a/driver/driver.hip.cpp b/driver/driver.hip.cpp index 77be2059a3..cc234f5091 100644 --- a/driver/driver.hip.cpp +++ b/driver/driver.hip.cpp @@ -491,7 +491,7 @@ int main(int argc, char* argv[]) constexpr unsigned HPad = 1; constexpr unsigned WPad = 1; -#elif 1 +#elif 0 // 1x1 filter, 28x28 image constexpr unsigned N = 16; constexpr unsigned C = 256; diff --git a/src/include/gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn.hip.hpp b/src/include/gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn.hip.hpp index cc8c08e8d5..f11be63954 100644 --- a/src/include/gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn.hip.hpp +++ b/src/include/gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn.hip.hpp @@ -94,8 +94,8 @@ gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn(const Float* const __restric // tensor view of blockwise input and weight in LDS // be careful of alignment - constexpr auto in_chwn_block_desc = - make_ConstantTensorDescriptor(Sequence{}); + constexpr auto in_chwn_block_desc = make_ConstantTensorDescriptor_aligned( + Sequence{}, Number{}); constexpr auto wei_ek_block_desc = make_ConstantTensorDescriptor_aligned( Sequence{}, Number{}); @@ -164,7 +164,9 @@ gridwise_implicit_gemm_convolution_1_chwn_csrk_khwn(const Float* const __restric HoPerThread>{}; // LDS: be careful of alignment - constexpr unsigned in_block_size = in_chwn_block_desc.GetElementSpace(); + constexpr unsigned in_block_size = + in_chwn_block_desc.GetElementSpace(Number{}); + constexpr unsigned wei_block_size = wei_csrk_block_desc.GetElementSpace(Number{});