mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-12 09:16:52 +00:00
bug fix: BlockwiseGenericTensorSliceCopy_v2::MoveDstSlicingWindow
This commit is contained in:
@@ -125,8 +125,8 @@ struct GridwiseConvolutionImplicitGemm_v1r3_chwn_cyxk_khwn_padded
|
||||
constexpr auto out_k_h_w_n_thread_desc = make_ConstantTensorDescriptor_packed(
|
||||
Sequence<KPerThread, HoPerThread, WoPerThread, NPerThread>{});
|
||||
|
||||
// blockwise copy
|
||||
// input: format is [C, Hi, Wi, N]
|
||||
// blockwise input copy
|
||||
// format is [C, Hi, Wi, N]
|
||||
auto blockwise_in_copy =
|
||||
BlockwiseGenericTensorSliceCopy_v2<BlockSize,
|
||||
decltype(in_c_h_w_n_global_desc),
|
||||
|
||||
@@ -417,6 +417,8 @@ struct BlockwiseGenericTensorSliceCopy_v1
|
||||
}
|
||||
};
|
||||
|
||||
// Slice a (normal or merged) tensor, and copy it into another (normal or merged) tensor
|
||||
// memory layout (ordering of dimensions) can be different between src and dst.
|
||||
template <index_t BlockSize,
|
||||
class SrcDesc,
|
||||
class DstDesc,
|
||||
@@ -510,7 +512,7 @@ struct BlockwiseGenericTensorSliceCopy_v2
|
||||
MoveDstSlicingWindow(T step_sizes,
|
||||
integral_constant<bool, PositiveDirection> positive_direction)
|
||||
{
|
||||
mThreadwiseLoad.MoveDstSlicingWindow(step_sizes, positive_direction);
|
||||
mThreadwiseStore.MoveDstSlicingWindow(step_sizes, positive_direction);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Reference in New Issue
Block a user