Refactor threadwise copy using sfcurve (#101)

* add space_filling_curve

* cleanup and move space_filling_curve into test

* WIP: start refactoring threadwise_transfer_v1r3

* threadwise_copy works but needs further refactoring

* add some comments

* add SpaceFillingCurve::GetIndices()

* minor changes

* removed GetIndices; refactored GetDstCoordinateResetStep

* add DynamicBuffer::Transfer, but Add is not tested

* rebased agaist develop

* threadwise_copy_v6r1/v6r2/v6r3 using space-filling curve start to work

* minor changes

* refactored threadcopy v3r1, v2; removed old implementations

* clang-format

* cleanup

* fix a typo in v6r3

* format

Co-authored-by: Chao Liu <chao.liu2@amd.com>
This commit is contained in:
Jianfeng Yan
2022-03-04 00:11:50 -06:00
committed by GitHub
parent c254e5abd2
commit 0619ebf70b
12 changed files with 290 additions and 1269 deletions

View File

@@ -29,9 +29,9 @@ void traverse_using_space_filling_curve()
constexpr auto I1 = Number<1>{};
constexpr auto I2 = Number<2>{};
using TensorLengths = Sequence<4, 10, 9>;
using TensorLengths = Sequence<16, 10, 9>;
using DimAccessOrder = Sequence<2, 0, 1>;
using ScalarsPerAccess = Sequence<1, 2, 3>;
using ScalarsPerAccess = Sequence<4, 2, 3>;
using SpaceFillingCurve = SpaceFillingCurve<TensorLengths, DimAccessOrder, ScalarsPerAccess>;
constexpr auto expected = make_tuple(make_tuple(0, 0, 0),
@@ -39,36 +39,36 @@ void traverse_using_space_filling_curve()
make_tuple(0, 4, 0),
make_tuple(0, 6, 0),
make_tuple(0, 8, 0),
make_tuple(1, 8, 0),
make_tuple(1, 6, 0),
make_tuple(1, 4, 0),
make_tuple(1, 2, 0),
make_tuple(1, 0, 0),
make_tuple(2, 0, 0),
make_tuple(2, 2, 0),
make_tuple(2, 4, 0),
make_tuple(2, 6, 0),
make_tuple(2, 8, 0),
make_tuple(3, 8, 0),
make_tuple(3, 6, 0),
make_tuple(3, 4, 0),
make_tuple(3, 2, 0),
make_tuple(3, 0, 0),
make_tuple(3, 0, 3),
make_tuple(3, 2, 3),
make_tuple(3, 4, 3),
make_tuple(3, 6, 3),
make_tuple(3, 8, 3),
make_tuple(2, 8, 3),
make_tuple(2, 6, 3),
make_tuple(2, 4, 3),
make_tuple(2, 2, 3),
make_tuple(2, 0, 3),
make_tuple(1, 0, 3),
make_tuple(1, 2, 3),
make_tuple(1, 4, 3),
make_tuple(1, 6, 3),
make_tuple(1, 8, 3),
make_tuple(4, 8, 0),
make_tuple(4, 6, 0),
make_tuple(4, 4, 0),
make_tuple(4, 2, 0),
make_tuple(4, 0, 0),
make_tuple(8, 0, 0),
make_tuple(8, 2, 0),
make_tuple(8, 4, 0),
make_tuple(8, 6, 0),
make_tuple(8, 8, 0),
make_tuple(12, 8, 0),
make_tuple(12, 6, 0),
make_tuple(12, 4, 0),
make_tuple(12, 2, 0),
make_tuple(12, 0, 0),
make_tuple(12, 0, 3),
make_tuple(12, 2, 3),
make_tuple(12, 4, 3),
make_tuple(12, 6, 3),
make_tuple(12, 8, 3),
make_tuple(8, 8, 3),
make_tuple(8, 6, 3),
make_tuple(8, 4, 3),
make_tuple(8, 2, 3),
make_tuple(8, 0, 3),
make_tuple(4, 0, 3),
make_tuple(4, 2, 3),
make_tuple(4, 4, 3),
make_tuple(4, 6, 3),
make_tuple(4, 8, 3),
make_tuple(0, 8, 3),
make_tuple(0, 6, 3),
make_tuple(0, 4, 3),
@@ -79,21 +79,21 @@ void traverse_using_space_filling_curve()
make_tuple(0, 4, 6),
make_tuple(0, 6, 6),
make_tuple(0, 8, 6),
make_tuple(1, 8, 6),
make_tuple(1, 6, 6),
make_tuple(1, 4, 6),
make_tuple(1, 2, 6),
make_tuple(1, 0, 6),
make_tuple(2, 0, 6),
make_tuple(2, 2, 6),
make_tuple(2, 4, 6),
make_tuple(2, 6, 6),
make_tuple(2, 8, 6),
make_tuple(3, 8, 6),
make_tuple(3, 6, 6),
make_tuple(3, 4, 6),
make_tuple(3, 2, 6),
make_tuple(3, 0, 6));
make_tuple(4, 8, 6),
make_tuple(4, 6, 6),
make_tuple(4, 4, 6),
make_tuple(4, 2, 6),
make_tuple(4, 0, 6),
make_tuple(8, 0, 6),
make_tuple(8, 2, 6),
make_tuple(8, 4, 6),
make_tuple(8, 6, 6),
make_tuple(8, 8, 6),
make_tuple(12, 8, 6),
make_tuple(12, 6, 6),
make_tuple(12, 4, 6),
make_tuple(12, 2, 6),
make_tuple(12, 0, 6));
constexpr index_t num_accesses = SpaceFillingCurve::GetNumOfAccess();