bug fix: BlockwiseGenericTensorSliceCopy_v2::MoveDstSlicingWindow

This commit is contained in:
Chao Liu
2019-08-15 15:12:13 -05:00
parent 86ceded98b
commit 08bf57b01c
2 changed files with 5 additions and 3 deletions

View File

@@ -125,8 +125,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
constexpr auto out_k_h_w_n_thread_desc = make_ConstantTensorDescriptor_packed(
Sequence<KPerThread, HoPerThread, WoPerThread, NPerThread>{});
// blockwise copy
// input: format is [C, Hi, Wi, N]
// blockwise input copy
// format is [C, Hi, Wi, N]
auto blockwise_in_copy =
BlockwiseGenericTensorSliceCopy_v2<BlockSize,
decltype(in_c_h_w_n_global_desc),

View File

@@ -417,6 +417,8 @@ struct BlockwiseGenericTensorSliceCopy_v1
}
};
// Slice a (normal or merged) tensor, and copy it into another (normal or merged) tensor
// memory layout (ordering of dimensions) can be different between src and dst.
template <index_t BlockSize,
class SrcDesc,
class DstDesc,
@@ -510,7 +512,7 @@ struct BlockwiseGenericTensorSliceCopy_v2
MoveDstSlicingWindow(T step_sizes,
integral_constant<bool, PositiveDirection> positive_direction)
{
mThreadwiseLoad.MoveDstSlicingWindow(step_sizes, positive_direction);
mThreadwiseStore.MoveDstSlicingWindow(step_sizes, positive_direction);
}
private: