From 3c50e984c9d60d93e44bcb0ef2cfd9ee24466f85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Pietil=C3=A4?= Date: Tue, 7 Oct 2025 13:28:32 +0000 Subject: [PATCH] Remove unused code. --- .../algorithm/static_encoding_pattern.hpp | 82 ------------------- include/ck_tile/core/tensor/buffer_view.hpp | 2 +- 2 files changed, 1 insertion(+), 83 deletions(-) diff --git a/include/ck_tile/core/algorithm/static_encoding_pattern.hpp b/include/ck_tile/core/algorithm/static_encoding_pattern.hpp index 775422fe06..c18a6fbb58 100644 --- a/include/ck_tile/core/algorithm/static_encoding_pattern.hpp +++ b/include/ck_tile/core/algorithm/static_encoding_pattern.hpp @@ -135,88 +135,6 @@ struct tile_distribution_encoding_pattern_2d : public tile_distribution_encoding { }; -// Sparse rows -template -struct tile_distribution_encoding_pattern_2d - : public tile_distribution_encoding_pattern -{ - static_assert(XPerTile % VecSize == 0, "XPerTile must be a multiple of VecSize!"); - static_assert(NumWaveGroups == 1, "NumWaveGroups must be 1 for sparse row pattern!"); - - static constexpr index_t warp_size = get_warp_size(); - static constexpr index_t num_warps = max(1, BlockSize / warp_size); - - // Calculate optimal vector size - static constexpr index_t LargestVec = max(1, (XPerTile * YPerTile) / (num_warps * warp_size)); - static constexpr index_t X1 = VecSize > LargestVec ? LargestVec : VecSize; - static constexpr index_t X0 = XPerTile / X1; - - // When YPerTile is small, prioritize X dimension distribution - // and handle Y dimension with minimal thread usage. - - // Calculate threads needed for one row. - static constexpr index_t threads_per_row = X0; - - // Calculate how many rows we can process in parallel with available threads - static constexpr index_t max_parallel_rows = min(YPerTile, warp_size / threads_per_row); - - // Y2: Number of rows each warp handles in one iteration - static constexpr index_t Y2 = max_parallel_rows; - - // Y0: Number of warps to use (may be less than total available) - static constexpr index_t warps_needed = (YPerTile + Y2 - 1) / Y2; - static constexpr index_t Y0 = min(warps_needed, num_warps); - - // Y1: Number of iterations needed to cover all rows - static constexpr index_t Y1 = (YPerTile + (Y0 * Y2) - 1) / (Y0 * Y2); - - // Validation - static_assert(Y0 > 0, "Y0 must be greater than 0!"); - static_assert(Y1 > 0, "Y1 must be greater than 0!"); - static_assert(Y2 > 0, "Y2 must be greater than 0!"); - static_assert(X0 > 0, "X0 must be greater than 0!"); - static_assert(X1 > 0, "X1 must be greater than 0!"); - - // Ensure we don't exceed available threads per warp - static_assert(threads_per_row * Y2 <= warp_size, - "Threads per row * rows per warp must not exceed warp size!"); - - // Ensure we cover all elements (may over-cover due to ceiling, but that's OK) - static_assert(Y0 * Y1 * Y2 >= YPerTile, - "Y0 * Y1 * Y2 must cover at least YPerTile rows"); - - CK_TILE_HOST_DEVICE static constexpr auto make_2d_static_tile_distribution() - { - return make_static_tile_distribution( - tile_distribution_encoding, - tuple, sequence>, - tuple, sequence<1, 2>>, - tuple, sequence<2, 0>>, // -> , - sequence<1, 2>, - sequence<1, 1>>{}); // -> - } - - CK_TILE_HOST_DEVICE static constexpr auto make_shuffled_2d_static_tile_distribution() - { - return make_static_tile_distribution( - tile_distribution_encoding, - tuple, sequence>, - tuple, sequence<2, 1>>, - tuple, sequence<2, 0>>, // -> , - sequence<1, 2>, - sequence<1, 1>>{}); // -> - } -}; - // Thread raked template >::scalar_type, typename vector_traits>::scalar_type>::value, bool>::type = false> - CK_TILE_HOST_DEVICE constexpr auto get(index_t i, + CK_TILE_DEVICE constexpr auto get(index_t i, index_t linear_offset, bool is_valid_element, bool_constant = {}) const