From 57c8cb19d27daffdbd96bf16ea2f8e00021514f3 Mon Sep 17 00:00:00 2001 From: Max Podkorytov <4273004+tenpercent@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:32:18 -0600 Subject: [PATCH] Optimize sequence_merge using direct concatenation for small cases Replace linear recursive instantiation with direct pack expansion for 1-4 sequences, and binary tree reduction for larger cases. Before: O(N) depth for merging N sequences After: O(log N) depth with O(1) for up to 4 sequences This further reduces maximum nesting depth from 26 to 22 levels when combined with the previous sequence_gen optimization. Co-Authored-By: Claude --- include/ck/utility/sequence.hpp | 63 +++++++++++++++---- .../ck/utility/statically_indexed_array.hpp | 1 + 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/include/ck/utility/sequence.hpp b/include/ck/utility/sequence.hpp index 2bd4b19da9..18bb36d112 100644 --- a/include/ck/utility/sequence.hpp +++ b/include/ck/utility/sequence.hpp @@ -199,30 +199,71 @@ template using make_index_sequence = typename __make_integer_seq::seq_type; -// merge sequence -template -struct sequence_merge +// merge sequence - optimized to avoid recursive instantiation +namespace detail { + +// Helper to concatenate multiple sequences in one step using fold expression +template +struct sequence_merge_impl; + +// Base case: single sequence +template +struct sequence_merge_impl> { - using type = typename sequence_merge::type>::type; + using type = Sequence; }; +// Two sequences: direct concatenation template -struct sequence_merge, Sequence> +struct sequence_merge_impl, Sequence> { using type = Sequence; }; -template -struct sequence_merge +// Three sequences: direct concatenation (avoids one level of recursion) +template +struct sequence_merge_impl, Sequence, Sequence> { - using type = Seq; + using type = Sequence; +}; + +// Four sequences: direct concatenation +template +struct sequence_merge_impl, Sequence, Sequence, Sequence> +{ + using type = Sequence; +}; + +// General case: binary tree reduction (O(log N) depth instead of O(N)) +template +struct sequence_merge_impl +{ + // Merge pairs first, then recurse + using left = typename sequence_merge_impl::type; + using right = typename sequence_merge_impl::type; + using type = typename sequence_merge_impl::type; +}; + +} // namespace detail + +template +struct sequence_merge +{ + using type = typename detail::sequence_merge_impl::type; +}; + +template <> +struct sequence_merge<> +{ + using type = Sequence<>; }; // generate sequence - optimized using __make_integer_seq to avoid recursive instantiation namespace detail { // Helper that applies functor F to indices and produces a Sequence -// __make_integer_seq produces sequence_gen_helper +// __make_integer_seq produces sequence_gen_helper template struct sequence_gen_helper { @@ -236,8 +277,8 @@ struct sequence_gen_helper template struct sequence_gen { - using type = typename __make_integer_seq:: - template apply; + using type = + typename __make_integer_seq::template apply; }; template diff --git a/include/ck/utility/statically_indexed_array.hpp b/include/ck/utility/statically_indexed_array.hpp index d0735a32f6..f3d73e84a7 100644 --- a/include/ck/utility/statically_indexed_array.hpp +++ b/include/ck/utility/statically_indexed_array.hpp @@ -20,6 +20,7 @@ struct tuple_concat, Tuple> using type = Tuple; }; +// StaticallyIndexedArrayImpl uses binary split for O(log N) depth template struct StaticallyIndexedArrayImpl {