From e17b495dff05a641e6a15efdb92b603b2f111a8a Mon Sep 17 00:00:00 2001 From: Chao Liu Date: Fri, 24 May 2019 16:43:29 -0500 Subject: [PATCH] refactor --- ...p.hpp => blockwise_generic_tensor_slice_op.hip.hpp} | 10 +++++----- ..._gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp | 2 +- ...nvolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp | 2 +- ...convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp | 8 ++++---- src/include/threadwise_tensor_slice_op.hip.hpp | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) rename src/include/{blockwise_merged_tensor_slice_op.hip.hpp => blockwise_generic_tensor_slice_op.hip.hpp} (96%) diff --git a/src/include/blockwise_merged_tensor_slice_op.hip.hpp b/src/include/blockwise_generic_tensor_slice_op.hip.hpp similarity index 96% rename from src/include/blockwise_merged_tensor_slice_op.hip.hpp rename to src/include/blockwise_generic_tensor_slice_op.hip.hpp index 7a534feda4..f88cf538e8 100644 --- a/src/include/blockwise_merged_tensor_slice_op.hip.hpp +++ b/src/include/blockwise_generic_tensor_slice_op.hip.hpp @@ -14,7 +14,7 @@ template -struct BlockwiseTensorSliceCopy_generic_v1 +struct BlockwiseGenericTensorSliceCopy_v1 { static constexpr index_t nDim = SrcDesc::GetNumOfDimension(); @@ -22,8 +22,8 @@ struct BlockwiseTensorSliceCopy_generic_v1 index_t mDstMyThreadOffset; __device__ - BlockwiseTensorSliceCopy_generic_v1(Array src_block_data_multi_id_begin, - Array dst_block_data_multi_id_begin) + BlockwiseGenericTensorSliceCopy_v1(Array src_block_data_multi_id_begin, + Array dst_block_data_multi_id_begin) { // check NDim consistent static_assert(nDim == SrcDesc::GetNumOfDimension() && @@ -155,7 +155,7 @@ struct BlockwiseTensorSliceCopy_generic_v1 const index_t clipboard_offset = thread_tensor_desc.GetOffsetFromMultiIndex( clipboard_data_multi_id_begin); // cannot not constexpr, why? - threadwise_tensor_slice_copy_generic(SrcDesc{}, + threadwise_generic_tensor_slice_copy(SrcDesc{}, p_src + src_offset + mSrcMyThreadOffset, make_zero_array(), thread_tensor_desc, @@ -193,7 +193,7 @@ struct BlockwiseTensorSliceCopy_generic_v1 const index_t dst_offset = DstDesc{}.GetOffsetFromMultiIndex( dst_data_multi_id_begin); // cannot not constexpr, why? - threadwise_tensor_slice_copy_generic(thread_tensor_desc, + threadwise_generic_tensor_slice_copy(thread_tensor_desc, p_clipboard + clipboard_offset, make_zero_array(), DstDesc{}, diff --git a/src/include/gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp b/src/include/gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp index b217f79548..e8aea0cb4f 100644 --- a/src/include/gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp +++ b/src/include/gridwise_convolution_implicit_gemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw.hip.hpp @@ -474,7 +474,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_lds_double_buffer_nchw_cyxk_nkhw map_out_global2thread, Number{}); #else - threadwise_tensor_slice_copy_generic( + threadwise_generic_tensor_slice_copy( out_10d_thread_desc.ReorderGivenNew2Old(map_out_global2thread), p_out_thread, make_zero_array(), diff --git a/src/include/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp b/src/include/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp index 444622e84e..5f731fe393 100644 --- a/src/include/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp +++ b/src/include/gridwise_convolution_implicit_gemm_v1r3_nchw_cyxk_nkhw.hip.hpp @@ -423,7 +423,7 @@ struct GridwiseConvolutionImplicitGemm_v1r3_nchw_cyxk_nkhw map_out_global2thread, Number{}); #else - threadwise_tensor_slice_copy_generic( + threadwise_generic_tensor_slice_copy( out_10d_thread_desc.ReorderGivenNew2Old(map_out_global2thread), p_out_thread, make_zero_array(), diff --git a/src/include/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp b/src/include/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp index c59ca57aad..5c924dd67a 100644 --- a/src/include/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp +++ b/src/include/gridwise_convolution_implicit_gemm_v3_nchw_cyxk_nkhw.hip.hpp @@ -3,7 +3,7 @@ #include "ConstantTensorDescriptor.hip.hpp" #include "ConstantMergedTensorDescriptor.hip.hpp" #include "ConstantMatrixDescriptor.hip.hpp" -#include "blockwise_merged_tensor_slice_op.hip.hpp" +#include "blockwise_generic_tensor_slice_op.hip.hpp" #include "blockwise_gemm.hip.hpp" #include "threadwise_tensor_slice_op.hip.hpp" @@ -123,7 +123,7 @@ struct GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw // input blockwise copy // slice a merged tensor, reorder and copy to a normal tensor // this copy operator already has blockwise offset built-in - const auto blockwise_in_copy = BlockwiseTensorSliceCopy_generic_v1< + const auto blockwise_in_copy = BlockwiseGenericTensorSliceCopy_v1< BlockSize, Float, decltype(in_c_n1_b_n2_global_merged_desc), @@ -152,7 +152,7 @@ struct GridwiseConvolutionImplicitGemm_v3_nchw_cyxk_nkhw // this copy operator already have blockwise offset built-in const auto blockwise_wei_copy = #if 0 - BlockwiseTensorSliceCopy_generic_v1 -__device__ void threadwise_tensor_slice_copy_generic( +__device__ void threadwise_generic_tensor_slice_copy( SrcDesc, const Float* __restrict__ p_src, Array src_multi_id_begin,