diff --git a/include/ck_tile/core/container/container_helper.hpp b/include/ck_tile/core/container/container_helper.hpp index 90579c0034..699f0c8a65 100644 --- a/include/ck_tile/core/container/container_helper.hpp +++ b/include/ck_tile/core/container/container_helper.hpp @@ -39,7 +39,7 @@ CK_TILE_HOST_DEVICE constexpr auto container_reorder_given_new2old(const array& old_array, sequence /*new2old*/) { static_assert(NSize == sizeof...(IRs), "wrong! size not consistent"); - static_assert(is_valid_sequence_map>{}, "wrong! invalid reorder map"); + static_assert(is_valid_sequence_map>::value, "wrong! invalid reorder map"); return make_array>(old_array[IRs]...); } @@ -89,7 +89,7 @@ CK_TILE_HOST_DEVICE constexpr auto container_reorder_given_new2old(const tuple>{}, "wrong! invalid reorder map"); + static_assert(is_valid_sequence_map>::value, "wrong! invalid reorder map"); return make_tuple(old_tuple[number{}]...); } @@ -109,7 +109,7 @@ CK_TILE_HOST_DEVICE constexpr auto container_reorder_given_new2old(sequence>{}, "wrong! invalid reorder map"); + static_assert(is_valid_sequence_map>::value, "wrong! invalid reorder map"); return sequence::at(number{})...>{}; } @@ -120,7 +120,7 @@ CK_TILE_HOST_DEVICE constexpr auto container_reorder_given_old2new(sequence>{}, "wrong! invalid reorder map"); + static_assert(is_valid_sequence_map>::value, "wrong! invalid reorder map"); constexpr auto new2old = typename sequence_map_inverse>::type{}; diff --git a/include/ck_tile/core/container/sequence.hpp b/include/ck_tile/core/container/sequence.hpp index 35858bf75e..73ce09b20e 100644 --- a/include/ck_tile/core/container/sequence.hpp +++ b/include/ck_tile/core/container/sequence.hpp @@ -144,9 +144,11 @@ struct sequence static_assert(MapOld2New::size() == size(), "wrong! reorder map should have the same size as sequence to be rerodered"); - static_assert(is_valid_sequence_map::value, "wrong! invalid reorder map"); + static_assert(is_valid_sequence_map>::value, + "wrong! invalid reorder map"); - return reorder_new_to_old(typename sequence_map_inverse::type{}); + return reorder_new_to_old( + typename sequence_map_inverse>::type{}); } CK_TILE_HOST_DEVICE static constexpr auto reverse() @@ -548,163 +550,59 @@ struct sequence_reduce }; #endif -template -struct sequence_sort_impl +// Sorts a sequence using constexpr insertion sort. O(1) template instantiation +// depth, replacing the recursive merge sort that created O(N log N) intermediate types. +namespace detail { + +template +struct sequence_sort_helper; + +template +struct sequence_sort_helper, Compare, sequence> { - template - struct sorted_sequence_merge_impl + struct sort_result { - static constexpr bool choose_left = LeftValues::front() < RightValues::front(); - - static constexpr index_t chosen_value = - choose_left ? LeftValues::front() : RightValues::front(); - static constexpr index_t chosen_id = choose_left ? LeftIds::front() : RightIds::front(); - - using new_merged_values = decltype(MergedValues::push_back(number{})); - using new_merged_ids = decltype(MergedIds::push_back(number{})); - - using new_left_values = typename std:: - conditional::type; - using new_left_ids = - typename std::conditional::type; - - using new_right_values = typename std:: - conditional::type; - using new_right_ids = - typename std::conditional::type; - - using merge = sorted_sequence_merge_impl; - // this is output - using merged_values = typename merge::merged_values; - using merged_ids = typename merge::merged_ids; + static_array values; + static_array ids; }; - template - struct sorted_sequence_merge_impl, - sequence<>, - MergedValues, - MergedIds, - Comp> + static constexpr sort_result compute() { - using merged_values = typename sequence_merge::type; - using merged_ids = typename sequence_merge::type; - }; + constexpr index_t n = sizeof...(Vs); + sort_result r{{{Vs...}}, {{Idx...}}}; + // insertion sort — O(N^2) constexpr steps, O(1) template depth + for(index_t i = 1; i < n; ++i) + { + for(index_t j = i; j > 0 && Compare{}(r.values[j], r.values[j - 1]); --j) + { + auto tv = r.values[j]; + r.values[j] = r.values[j - 1]; + r.values[j - 1] = tv; + auto ti = r.ids[j]; + r.ids[j] = r.ids[j - 1]; + r.ids[j - 1] = ti; + } + } + return r; + } - template - struct sorted_sequence_merge_impl, - sequence<>, - RightValues, - RightIds, - MergedValues, - MergedIds, - Comp> - { - using merged_values = typename sequence_merge::type; - using merged_ids = typename sequence_merge::type; - }; - - template - struct sorted_sequence_merge - { - using merge = sorted_sequence_merge_impl, - sequence<>, - Comp>; - - using merged_values = typename merge::merged_values; - using merged_ids = typename merge::merged_ids; - }; - - static constexpr index_t nsize = Values::size(); - - using split_unsorted_values = sequence_split; - using split_unsorted_ids = sequence_split; - - using left_unsorted_values = typename split_unsorted_values::left_type; - using left_unsorted_ids = typename split_unsorted_ids::left_type; - using left_sort = sequence_sort_impl; - using left_sorted_values = typename left_sort::sorted_values; - using left_sorted_ids = typename left_sort::sorted_ids; - - using right_unsorted_values = typename split_unsorted_values::right_type; - using right_unsorted_ids = typename split_unsorted_ids::right_type; - using right_sort = sequence_sort_impl; - using right_sorted_values = typename right_sort::sorted_values; - using right_sorted_ids = typename right_sort::sorted_ids; - - using merged_sorted = sorted_sequence_merge; - - using sorted_values = typename merged_sorted::merged_values; - using sorted_ids = typename merged_sorted::merged_ids; + static constexpr sort_result sorted = compute(); + using sorted_values = sequence; + using sorted_ids = sequence; }; -template -struct sequence_sort_impl, sequence, Compare> -{ - static constexpr bool choose_x = Compare{}(ValueX, ValueY); - - using sorted_values = typename std:: - conditional, sequence>::type; - using sorted_ids = - typename std::conditional, sequence>::type; -}; - -template -struct sequence_sort_impl, sequence, Compare> -{ - using sorted_values = sequence; - using sorted_ids = sequence; -}; - -template -struct sequence_sort_impl, sequence<>, Compare> -{ - using sorted_values = sequence<>; - using sorted_ids = sequence<>; -}; +} // namespace detail template struct sequence_sort { - using unsorted_ids = typename arithmetic_sequence_gen<0, Values::size(), 1>::type; - using sort = sequence_sort_impl; + static constexpr index_t n = Values::size(); + using idx_seq = make_index_sequence; - // this is output - using type = typename sort::sorted_values; - using sorted2unsorted_map = typename sort::sorted_ids; + using helper = detail::sequence_sort_helper, Compare, idx_seq>; + + using type = typename helper::sorted_values; + using sorted2unsorted_map = typename helper::sorted_ids; }; template @@ -782,10 +680,42 @@ struct sequence_unique_sort using sorted2unsorted_map = typename uniquify::uniquified_ids; }; +// Validates that a sequence is a permutation of {0, 1, ..., N-1}. +// Uses a constexpr loop instead of instantiating sequence_sort. +namespace detail { + +template +constexpr bool check_valid_sequence_map() +{ + constexpr index_t n = sizeof...(Is); + if constexpr(n == 0) + { + return true; + } + else + { + constexpr index_t vals[] = {Is...}; + static_array seen{}; + for(index_t i = 0; i < n; ++i) + { + if(vals[i] < 0 || vals[i] >= n || seen[vals[i]]) + return false; + seen[vals[i]] = true; + } + return true; + } +} + +} // namespace detail + template -struct is_valid_sequence_map - : std::is_same::type, - typename sequence_sort>::type> +struct is_valid_sequence_map : std::false_type +{ +}; + +template +struct is_valid_sequence_map> + : std::integral_constant()> { }; diff --git a/include/ck_tile/core/tensor/tensor_adaptor.hpp b/include/ck_tile/core/tensor/tensor_adaptor.hpp index e6cdb66ef9..56c62a29ee 100644 --- a/include/ck_tile/core/tensor/tensor_adaptor.hpp +++ b/include/ck_tile/core/tensor/tensor_adaptor.hpp @@ -376,9 +376,10 @@ CK_TILE_HOST_DEVICE constexpr auto make_single_stage_tensor_adaptor(const Transf constexpr auto all_up_dim_new_top_ids = unpack( [](auto&&... xs) constexpr { return merge_sequences(xs...); }, UpperDimensionNewTopIdss{}); - static_assert(is_valid_sequence_map::value && - is_valid_sequence_map::value, - "wrong!"); + static_assert( + is_valid_sequence_map>::value && + is_valid_sequence_map>::value, + "wrong!"); constexpr index_t ndim_old_top = all_low_dim_old_top_ids.size(); constexpr index_t ndim_new_top = all_up_dim_new_top_ids.size(); @@ -443,8 +444,8 @@ transform_tensor_adaptor(const OldTensorAdaptor& old_tensor_adaptor, constexpr auto all_new_top_ids = unpack([](auto... xs) { return merge_sequences(xs...); }, NewUpperDimensionNewTopIdss{}); - static_assert(is_valid_sequence_map::value && - is_valid_sequence_map::value, + static_assert(is_valid_sequence_map>::value && + is_valid_sequence_map>::value, "wrong!"); } diff --git a/include/ck_tile/core/utility/functional.hpp b/include/ck_tile/core/utility/functional.hpp index ae79d575a8..032be236b6 100644 --- a/include/ck_tile/core/utility/functional.hpp +++ b/include/ck_tile/core/utility/functional.hpp @@ -135,65 +135,147 @@ struct idx_identity namespace detail { -// RemainLengths: sequence<...> -// Orders: sequence<...> -template -struct static_ford_impl +// Computes the inverse of a permutation as a constexpr array. +// Avoids the sequence_map_inverse -> is_valid_sequence_map -> sequence_sort chain. +template +struct inverse_perm; + +template +struct inverse_perm> { - CK_TILE_HOST_DEVICE constexpr static_ford_impl() + static constexpr auto compute() { - static_assert(RemainLengths::size() > 0, "wrong! should not get here"); + constexpr index_t n = sizeof...(Ps); + static_array result{}; + constexpr index_t input[] = {Ps...}; + for(index_t i = 0; i < n; ++i) + { + result[input[i]] = i; + } + return result; + } + static constexpr auto value = compute(); +}; + +// Decomposes a linear index into multi-dimensional indices using pre-computed +// strides. Uses a single flat static_for instead of recursive nesting, which +// eliminates intermediate lambda closure instantiations. +template +struct index_decomposer; + +template +struct index_decomposer, sequence> +{ + static constexpr index_t n_dim = sizeof...(Ls); + static constexpr static_array lengths = {{Ls...}}; + + static constexpr static_array compute_all_strides() + { + static_array result{}; + if constexpr(n_dim > 0) + { + result[n_dim - 1] = 1; + for(index_t i = n_dim - 1; i > 0; --i) + { + result[i - 1] = result[i] * lengths[i]; + } + } + return result; } - // F signature: F(sequence<...>) - // CurrentOrderedId: sequence<...> - template - CK_TILE_HOST_DEVICE constexpr void operator()(F f, CurrentOrderedId) const + static constexpr static_array strides = compute_all_strides(); + + // Compile-time decomposition: linear index -> sequence of per-dimension indices + template + using decompose = sequence<((LinearIdx / strides[Is]) % lengths[Is])...>; + + // Decompose AND reorder in one step using a pre-computed inverse permutation. + // Produces the unordered multi-index directly, avoiding per-iteration + // reorder_old_to_new member function instantiations on each unique sequence type. + template + using decompose_reordered = sequence<((LinearIdx / strides[inverse_perm::value[Is]]) % + lengths[inverse_perm::value[Is]])...>; +}; + +// Calls f(decompose{}) for each linear index I in the pack, using a single +// fold expression. Bypasses the static_for lambda entirely, eliminating M*N +// intermediate lambda closure instantiations that the lambda-based approach creates. +template +struct ford_applier; + +template +struct ford_applier> +{ + template + CK_TILE_HOST_DEVICE constexpr void operator()(F f) const { - static_for<0, RemainLengths::front(), 1>{}([=](auto I) { - static_ford_impl{}( - f, CurrentOrderedId::push_back(I)); - }); + if constexpr(sizeof...(LinearIds) > 0) + { + (f(typename Decomposer::template decompose{}), ...); + } } }; -template -struct static_ford_impl, Orders> +// Same as ford_applier but applies reordering during decomposition. +template +struct ford_applier_reordered; + +template +struct ford_applier_reordered> { - // F signature: F(sequence<...>) - // OrderedId: sequence<...> - template - CK_TILE_HOST_DEVICE constexpr void operator()(F f, OrderedId) const + template + CK_TILE_HOST_DEVICE constexpr void operator()(F f) const { - // retrive unordered Id - f(OrderedId::reorder_old_to_new(Orders{})); + if constexpr(sizeof...(LinearIds) > 0) + { + (f(typename Decomposer::template decompose_reordered{}), ...); + } } }; } // namespace detail -// Lengths is sequence<...>, it is the length of each dimension for -// N-dimensional loop -// Orders is sequence<...>, it is the order of dimension in which static_ford -// will loop over each -// dimension +// Compile-time N-dimensional loop with static multi-indices. +// Uses direct fold expansion with index decomposition, producing zero +// intermediate lambda closures. Each iteration calls f with a compile-time +// sequence containing the multi-dimensional index. template ::type> struct static_ford { + static constexpr index_t n_dim = Lengths::size(); + static constexpr index_t total_size = + reduce_on_sequence(Lengths{}, multiplies<>{}, number<1>{}); + + static constexpr bool is_identity_order = std::is_same_v>; + + // For identity order, OrderedLengths == Lengths (no reorder needed). + // For non-identity, reorder lengths according to iteration order. + // Both branches must be valid types, but only the active one is used. + using OrderedLengths = + std::conditional_t>; + using Decomposer = detail::index_decomposer>; + CK_TILE_HOST_DEVICE constexpr static_ford() { static_assert(Lengths::size() > 0, "wrong! Lengths is empty"); static_assert(Lengths::size() == Orders::size(), "wrong! inconsistent size"); } - // F signature: F(sequence<...> multi_id) - // multi_id is the unordered multi-index template CK_TILE_HOST_DEVICE constexpr void operator()(F f) const { - constexpr auto ordered_lengths = Lengths::reorder_new_to_old(Orders{}); - detail::static_ford_impl{}(f, sequence<>{}); + if constexpr(is_identity_order) + { + detail::ford_applier>{}(f); + } + else + { + detail::ford_applier_reordered>{}( + f); + } } }; diff --git a/test/ck_tile/core/container/unit_sequence.cpp b/test/ck_tile/core/container/unit_sequence.cpp index 3769d6ecf9..2ce0d0f7e8 100644 --- a/test/ck_tile/core/container/unit_sequence.cpp +++ b/test/ck_tile/core/container/unit_sequence.cpp @@ -355,6 +355,102 @@ TEST(SequenceSort, SortSingleElement) EXPECT_TRUE((std::is_same::value)); } +// Test sequence_sort sorted2unsorted_map (index tracking) +TEST(SequenceSort, SortedMapUnsorted) +{ + using Seq = sequence<5, 2, 8, 1, 9>; + using Sort = sequence_sort>; + using Map = typename Sort::sorted2unsorted_map; + // sorted = <1,2,5,8,9>, original indices = <3,1,0,2,4> + using Expected = sequence<3, 1, 0, 2, 4>; + EXPECT_TRUE((std::is_same::value)); +} + +TEST(SequenceSort, SortedMapAlreadySorted) +{ + using Seq = sequence<1, 2, 3, 4, 5>; + using Sort = sequence_sort>; + using Map = typename Sort::sorted2unsorted_map; + // Already sorted: map should be identity + using Expected = sequence<0, 1, 2, 3, 4>; + EXPECT_TRUE((std::is_same::value)); +} + +TEST(SequenceSort, SortedMapRoundTrip) +{ + // Verify: sorted_values[i] == original[sorted2unsorted_map[i]] + using Seq = sequence<5, 2, 8, 1, 9>; + using Sort = sequence_sort>; + // sorted = <1,2,5,8,9>, map = <3,1,0,2,4> + EXPECT_EQ(Seq::at(Sort::sorted2unsorted_map::at(0)), Sort::type::at(0)); + EXPECT_EQ(Seq::at(Sort::sorted2unsorted_map::at(1)), Sort::type::at(1)); + EXPECT_EQ(Seq::at(Sort::sorted2unsorted_map::at(2)), Sort::type::at(2)); + EXPECT_EQ(Seq::at(Sort::sorted2unsorted_map::at(3)), Sort::type::at(3)); + EXPECT_EQ(Seq::at(Sort::sorted2unsorted_map::at(4)), Sort::type::at(4)); +} + +TEST(SequenceSort, SortedMapWithDuplicates) +{ + using Seq = sequence<3, 1, 3, 1>; + using Sort = sequence_sort>; + using Sorted = typename Sort::type; + using Map = typename Sort::sorted2unsorted_map; + // sorted = <1,1,3,3> + using ExpectedSorted = sequence<1, 1, 3, 3>; + EXPECT_TRUE((std::is_same::value)); + // Verify round-trip: original[map[i]] == sorted[i] for all i + // (don't assert specific index order for duplicates — sort stability may vary) + EXPECT_EQ(Seq::at(Map::at(0)), Sorted::at(0)); + EXPECT_EQ(Seq::at(Map::at(1)), Sorted::at(1)); + EXPECT_EQ(Seq::at(Map::at(2)), Sorted::at(2)); + EXPECT_EQ(Seq::at(Map::at(3)), Sorted::at(3)); +} + +TEST(SequenceSort, SortedMapReverseSorted) +{ + using Seq = sequence<5, 4, 3, 2, 1>; + using Sort = sequence_sort>; + using Sorted = typename Sort::type; + using Map = typename Sort::sorted2unsorted_map; + using ExpSorted = sequence<1, 2, 3, 4, 5>; + using ExpMap = sequence<4, 3, 2, 1, 0>; + EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); +} + +TEST(SequenceSort, SortedMapEmpty) +{ + using Sort = sequence_sort, less>; + using Map = typename Sort::sorted2unsorted_map; + EXPECT_TRUE((std::is_same>::value)); +} + +TEST(SequenceSort, SortedMapSingleElement) +{ + using Sort = sequence_sort, less>; + using Map = typename Sort::sorted2unsorted_map; + EXPECT_TRUE((std::is_same>::value)); +} + +// Test sequence_unique_sort sorted2unsorted_map +TEST(SequenceUniqueSort, UniqueSortMap) +{ + using Seq = sequence<3, 1, 4, 1, 5, 9, 2, 6, 5>; + using Result = sequence_unique_sort, equal>; + using Map = typename Result::sorted2unsorted_map; + // sorted unique = <1,2,3,4,5,6,9> + // The map should reference the first occurrence of each unique value in the original + // Verify round-trip: for each i, original[map[i]] == sorted_unique[i] + using Values = typename Result::type; + EXPECT_EQ(Seq::at(Map::at(0)), Values::at(0)); // 1 + EXPECT_EQ(Seq::at(Map::at(1)), Values::at(1)); // 2 + EXPECT_EQ(Seq::at(Map::at(2)), Values::at(2)); // 3 + EXPECT_EQ(Seq::at(Map::at(3)), Values::at(3)); // 4 + EXPECT_EQ(Seq::at(Map::at(4)), Values::at(4)); // 5 + EXPECT_EQ(Seq::at(Map::at(5)), Values::at(5)); // 6 + EXPECT_EQ(Seq::at(Map::at(6)), Values::at(6)); // 9 +} + // Test sequence_unique_sort TEST(SequenceUniqueSort, UniqueSort) { @@ -405,6 +501,24 @@ TEST(SequenceMap, InvalidMapMissing) EXPECT_FALSE((is_valid_sequence_map::value)); } +TEST(SequenceMap, InvalidMapNegative) +{ + using Map = sequence<0, -1, 2>; + EXPECT_FALSE((is_valid_sequence_map::value)); +} + +TEST(SequenceMap, ValidMapSingleElement) +{ + EXPECT_TRUE((is_valid_sequence_map>::value)); +} + +TEST(SequenceMap, InvalidMapSingleElement) +{ + EXPECT_FALSE((is_valid_sequence_map>::value)); +} + +TEST(SequenceMap, ValidMapEmpty) { EXPECT_TRUE((is_valid_sequence_map>::value)); } + // Test sequence_map_inverse // Note: sequence_map_inverse inverts a mapping where Map[i] = j means old position i maps to new // position j The inverse gives us new position i came from old position inverse[i] diff --git a/test/ck_tile/utility/CMakeLists.txt b/test/ck_tile/utility/CMakeLists.txt index 42bdb26e1d..2a377139b8 100644 --- a/test/ck_tile/utility/CMakeLists.txt +++ b/test/ck_tile/utility/CMakeLists.txt @@ -5,6 +5,7 @@ message("-- Adding: test/ck_tile/utility/") add_gtest_executable(test_fill test_fill.cpp) add_gtest_executable(test_ck_tile_sequence test_sequence.cpp) +add_gtest_executable(test_ck_tile_static_ford test_static_ford.cpp) # Add print tests add_subdirectory(print) diff --git a/test/ck_tile/utility/test_static_ford.cpp b/test/ck_tile/utility/test_static_ford.cpp new file mode 100644 index 0000000000..7337471647 --- /dev/null +++ b/test/ck_tile/utility/test_static_ford.cpp @@ -0,0 +1,293 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include "ck_tile/core/container/sequence.hpp" +#include "ck_tile/core/utility/functional.hpp" + +using namespace ck_tile; + +// ============================================================================ +// static_ford Tests — Identity Order (default) +// ============================================================================ + +TEST(CkTileStaticFord, Identity2D) +{ + std::vector> visited; + + static_ford>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + visited.emplace_back(i, j); + }); + + ASSERT_EQ(visited.size(), 6u); + EXPECT_EQ(visited[0], std::make_pair(0, 0)); + EXPECT_EQ(visited[1], std::make_pair(0, 1)); + EXPECT_EQ(visited[2], std::make_pair(0, 2)); + EXPECT_EQ(visited[3], std::make_pair(1, 0)); + EXPECT_EQ(visited[4], std::make_pair(1, 1)); + EXPECT_EQ(visited[5], std::make_pair(1, 2)); +} + +TEST(CkTileStaticFord, Identity3D) +{ + std::vector> visited; + + static_ford>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + constexpr index_t k = multi_id[number<2>{}]; + visited.emplace_back(i, j, k); + }); + + ASSERT_EQ(visited.size(), 12u); + EXPECT_EQ(visited[0], std::make_tuple(0, 0, 0)); + EXPECT_EQ(visited[1], std::make_tuple(0, 0, 1)); + EXPECT_EQ(visited[2], std::make_tuple(0, 1, 0)); + EXPECT_EQ(visited[3], std::make_tuple(0, 1, 1)); + EXPECT_EQ(visited[4], std::make_tuple(0, 2, 0)); + EXPECT_EQ(visited[5], std::make_tuple(0, 2, 1)); + EXPECT_EQ(visited[6], std::make_tuple(1, 0, 0)); + EXPECT_EQ(visited[7], std::make_tuple(1, 0, 1)); + EXPECT_EQ(visited[8], std::make_tuple(1, 1, 0)); + EXPECT_EQ(visited[9], std::make_tuple(1, 1, 1)); + EXPECT_EQ(visited[10], std::make_tuple(1, 2, 0)); + EXPECT_EQ(visited[11], std::make_tuple(1, 2, 1)); +} + +TEST(CkTileStaticFord, Identity1D) +{ + std::vector visited; + + static_ford>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + visited.push_back(i); + }); + + ASSERT_EQ(visited.size(), 5u); + for(index_t i = 0; i < 5; ++i) + { + EXPECT_EQ(visited[i], i); + } +} + +TEST(CkTileStaticFord, SingleElement1D) +{ + std::vector visited; + + static_ford>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + visited.push_back(i); + }); + + ASSERT_EQ(visited.size(), 1u); + EXPECT_EQ(visited[0], 0); +} + +TEST(CkTileStaticFord, SingleElement2D) +{ + std::vector> visited; + + static_ford>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + visited.emplace_back(i, j); + }); + + ASSERT_EQ(visited.size(), 1u); + EXPECT_EQ(visited[0], std::make_pair(0, 0)); +} + +TEST(CkTileStaticFord, IdentityWithUnitDim) +{ + std::vector> visited; + + static_ford>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + constexpr index_t k = multi_id[number<2>{}]; + visited.emplace_back(i, j, k); + }); + + ASSERT_EQ(visited.size(), 6u); + EXPECT_EQ(visited[0], std::make_tuple(0, 0, 0)); + EXPECT_EQ(visited[1], std::make_tuple(0, 0, 1)); + EXPECT_EQ(visited[2], std::make_tuple(0, 0, 2)); + EXPECT_EQ(visited[3], std::make_tuple(1, 0, 0)); + EXPECT_EQ(visited[4], std::make_tuple(1, 0, 1)); + EXPECT_EQ(visited[5], std::make_tuple(1, 0, 2)); +} + +// ============================================================================ +// static_ford Tests — Non-Identity Order (primary template with decompose_reordered) +// ============================================================================ + +TEST(CkTileStaticFord, ReversedOrder2D) +{ + std::vector> visited; + + // Order (1, 0): dim 1 is outer, dim 0 is inner (column-major) + static_ford, sequence<1, 0>>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + visited.emplace_back(i, j); + }); + + ASSERT_EQ(visited.size(), 6u); + EXPECT_EQ(visited[0], std::make_pair(0, 0)); + EXPECT_EQ(visited[1], std::make_pair(1, 0)); + EXPECT_EQ(visited[2], std::make_pair(0, 1)); + EXPECT_EQ(visited[3], std::make_pair(1, 1)); + EXPECT_EQ(visited[4], std::make_pair(0, 2)); + EXPECT_EQ(visited[5], std::make_pair(1, 2)); +} + +TEST(CkTileStaticFord, CustomOrder3D_201) +{ + std::vector> visited; + + // Orders<2,0,1>: dim 2 outermost, dim 0 middle, dim 1 innermost + static_ford, sequence<2, 0, 1>>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + constexpr index_t k = multi_id[number<2>{}]; + visited.emplace_back(i, j, k); + }); + + ASSERT_EQ(visited.size(), 24u); + // With orders (2,0,1): k varies slowest, then i, then j fastest + EXPECT_EQ(visited[0], std::make_tuple(0, 0, 0)); + EXPECT_EQ(visited[1], std::make_tuple(0, 1, 0)); + EXPECT_EQ(visited[2], std::make_tuple(0, 2, 0)); + EXPECT_EQ(visited[3], std::make_tuple(1, 0, 0)); + EXPECT_EQ(visited[4], std::make_tuple(1, 1, 0)); + EXPECT_EQ(visited[5], std::make_tuple(1, 2, 0)); + EXPECT_EQ(visited[6], std::make_tuple(0, 0, 1)); + EXPECT_EQ(visited[7], std::make_tuple(0, 1, 1)); + // Tail: last element should be (1, 2, 3) + EXPECT_EQ(visited[23], std::make_tuple(1, 2, 3)); +} + +TEST(CkTileStaticFord, CustomOrder3D_120) +{ + std::vector> visited; + + // Orders<1,2,0>: dim 1 outermost, dim 2 middle, dim 0 innermost + static_ford, sequence<1, 2, 0>>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + constexpr index_t k = multi_id[number<2>{}]; + visited.emplace_back(i, j, k); + }); + + ASSERT_EQ(visited.size(), 12u); + // With orders (1,2,0): j varies slowest, then k, then i fastest + EXPECT_EQ(visited[0], std::make_tuple(0, 0, 0)); + EXPECT_EQ(visited[1], std::make_tuple(1, 0, 0)); + EXPECT_EQ(visited[2], std::make_tuple(0, 0, 1)); + EXPECT_EQ(visited[3], std::make_tuple(1, 0, 1)); + EXPECT_EQ(visited[4], std::make_tuple(0, 1, 0)); + EXPECT_EQ(visited[5], std::make_tuple(1, 1, 0)); + // Tail: last element should be (1, 2, 1) + EXPECT_EQ(visited[11], std::make_tuple(1, 2, 1)); +} + +TEST(CkTileStaticFord, NonIdentityWithUnitDim) +{ + std::vector> visited; + + // Unit dim at position 1 with non-trivial order + static_ford, sequence<2, 0, 1>>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + constexpr index_t k = multi_id[number<2>{}]; + visited.emplace_back(i, j, k); + }); + + ASSERT_EQ(visited.size(), 6u); + // All entries must have j == 0 (unit dimension) + for(size_t idx = 0; idx < visited.size(); ++idx) + { + EXPECT_EQ(std::get<1>(visited[idx]), 0) << "Unit dim not zero at iteration " << idx; + } +} + +TEST(CkTileStaticFord, CustomOrder4D) +{ + std::vector> visited; + + // 4D with order <3,1,0,2> + static_ford, sequence<3, 1, 0, 2>>{}([&](auto multi_id) { + constexpr index_t a = multi_id[number<0>{}]; + constexpr index_t b = multi_id[number<1>{}]; + constexpr index_t c = multi_id[number<2>{}]; + constexpr index_t d = multi_id[number<3>{}]; + visited.emplace_back(a, b, c, d); + }); + + ASSERT_EQ(visited.size(), 48u); + // dim 3 (size 4) outermost, dim 1 (size 3) next, dim 0 (size 2) next, dim 2 (size 2) inner + EXPECT_EQ(visited[0], std::make_tuple(0, 0, 0, 0)); + EXPECT_EQ(visited[1], std::make_tuple(0, 0, 1, 0)); + EXPECT_EQ(visited[2], std::make_tuple(1, 0, 0, 0)); + EXPECT_EQ(visited[3], std::make_tuple(1, 0, 1, 0)); + EXPECT_EQ(visited[4], std::make_tuple(0, 1, 0, 0)); + EXPECT_EQ(visited[5], std::make_tuple(0, 1, 1, 0)); +} + +TEST(CkTileStaticFord, AsymmetricDimsWithOrder) +{ + std::vector> visited; + + // Asymmetric: 3x5 with reversed order + static_ford, sequence<1, 0>>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + visited.emplace_back(i, j); + }); + + ASSERT_EQ(visited.size(), 15u); + // dim 1 (size 5) outer, dim 0 (size 3) inner + EXPECT_EQ(visited[0], std::make_pair(0, 0)); + EXPECT_EQ(visited[1], std::make_pair(1, 0)); + EXPECT_EQ(visited[2], std::make_pair(2, 0)); + EXPECT_EQ(visited[3], std::make_pair(0, 1)); + EXPECT_EQ(visited[4], std::make_pair(1, 1)); + EXPECT_EQ(visited[5], std::make_pair(2, 1)); +} + +// ============================================================================ +// Consistency: identity order matches explicit identity order +// ============================================================================ + +TEST(CkTileStaticFord, IdentityOrderMatchesExplicit) +{ + std::vector> default_visited; + std::vector> explicit_visited; + + static_ford>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + default_visited.emplace_back(i, j); + }); + + static_ford, sequence<0, 1>>{}([&](auto multi_id) { + constexpr index_t i = multi_id[number<0>{}]; + constexpr index_t j = multi_id[number<1>{}]; + explicit_visited.emplace_back(i, j); + }); + + ASSERT_EQ(default_visited.size(), explicit_visited.size()); + for(size_t i = 0; i < default_visited.size(); ++i) + { + EXPECT_EQ(default_visited[i], explicit_visited[i]) << "Mismatch at iteration " << i; + } +} + +// index_decomposer and inverse_perm are implementation details tested +// indirectly through the static_ford behavioral tests above. +// The IdentityOrderMatchesExplicit test verifies both code paths +// (identity specialization and primary template) produce identical results.