padding for chwn is functional

This commit is contained in:
Chao Liu
2019-09-12 01:12:08 -05:00
parent 724e984bff
commit 1f70524471
9 changed files with 118 additions and 272 deletions

View File

@@ -408,8 +408,7 @@ struct BlockwiseGenericTensorSliceCopy_v1
template <class T, bool PositiveDirection>
__device__ void
MoveSrcSlicingWindow(T step_sizes,
integral_constant<bool, PositiveDirection> positive_direction)
MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
{
static_for<0, nDim, 1>{}([&](auto idim) {
if(step_sizes[idim] != 0)
@@ -506,18 +505,16 @@ struct BlockwiseGenericTensorSliceCopy_v2
template <class T, bool PositiveDirection>
__device__ void
MoveSrcSlicingWindow(T step_sizes,
integral_constant<bool, PositiveDirection> positive_direction)
MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
{
mThreadwiseLoad.MoveSrcSlicingWindow(step_sizes, positive_direction);
mThreadwiseLoad.MoveSrcSliceWindow(step_sizes, positive_direction);
}
template <class T, bool PositiveDirection>
__device__ void
MoveDstSlicingWindow(T step_sizes,
integral_constant<bool, PositiveDirection> positive_direction)
MoveDstSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
{
mThreadwiseStore.MoveDstSlicingWindow(step_sizes, positive_direction);
mThreadwiseStore.MoveDstSliceWindow(step_sizes, positive_direction);
}
private:
@@ -753,18 +750,16 @@ struct BlockwiseGenericTensorSliceCopy_v4
template <class T, bool PositiveDirection>
__device__ void
MoveSrcSlicingWindow(T step_sizes,
integral_constant<bool, PositiveDirection> positive_direction)
MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
{
mThreadwiseLoad.MoveSrcSlicingWindow(step_sizes, positive_direction);
mThreadwiseLoad.MoveSrcSliceWindow(step_sizes, positive_direction);
}
template <class T, bool PositiveDirection>
__device__ void
MoveDstSlicingWindow(T step_sizes,
integral_constant<bool, PositiveDirection> positive_direction)
MoveDstSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection> positive_direction)
{
mThreadwiseStore.MoveDstSlicingWindow(step_sizes, positive_direction);
mThreadwiseStore.MoveDstSliceWindow(step_sizes, positive_direction);
}
private:

View File

@@ -757,7 +757,7 @@ struct ThreadwiseGenericTensorSliceCopy_v2r1
// T can be Sequence or Array
template <class T, bool PositiveDirection>
__device__ void MoveSrcSlicingWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
__device__ void MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
{
static_if<PositiveDirection>{}([&](auto) {
mSrcSliceOrigin += step_sizes;
@@ -765,7 +765,7 @@ struct ThreadwiseGenericTensorSliceCopy_v2r1
}
template <class T, bool PositiveDirection>
__device__ void MoveDstSlicingWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
__device__ void MoveDstSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
{
static_if<PositiveDirection>{}([&](auto) {
mDstSliceOrigin += step_sizes;
@@ -1045,8 +1045,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// TODO: still kind of messy
if(!src_coord.IsAnyLevelIndexInPaddingArea())
{
const index_t src_offset =
(mSrcSliceOrigin + (long_vector_data_begin_id + scalar_id)).GetOffset();
const index_t src_offset = src_coord.GetOffset();
const index_t buffer_offset = i * src_data_per_access;
@@ -1073,7 +1072,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
}
template <class T, bool PositiveDirection>
__device__ void MoveSrcSlicingWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
__device__ void MoveSrcSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
{
static_if<PositiveDirection>{}([&](auto) {
mSrcSliceOrigin += step_sizes;
@@ -1081,7 +1080,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
}
template <class T, bool PositiveDirection>
__device__ void MoveDstSlicingWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
__device__ void MoveDstSliceWindow(T step_sizes, integral_constant<bool, PositiveDirection>)
{
static_if<PositiveDirection>{}([&](auto) {
mDstSliceOrigin += step_sizes;