Remove unused code.

This commit is contained in:
Ville Pietilä
2025-10-07 13:28:32 +00:00
parent faf07cc3ab
commit 3c50e984c9
2 changed files with 1 additions and 83 deletions

View File

@@ -135,88 +135,6 @@ struct tile_distribution_encoding_pattern_2d : public tile_distribution_encoding
{
};
// Sparse rows
template <index_t BlockSize,
index_t YPerTile,
index_t XPerTile,
index_t VecSize,
index_t NumWaveGroups>
struct tile_distribution_encoding_pattern_2d<BlockSize,
YPerTile,
XPerTile,
VecSize,
tile_distribution_pattern::sparse_row,
NumWaveGroups>
: public tile_distribution_encoding_pattern
{
static_assert(XPerTile % VecSize == 0, "XPerTile must be a multiple of VecSize!");
static_assert(NumWaveGroups == 1, "NumWaveGroups must be 1 for sparse row pattern!");
static constexpr index_t warp_size = get_warp_size();
static constexpr index_t num_warps = max(1, BlockSize / warp_size);
// Calculate optimal vector size
static constexpr index_t LargestVec = max(1, (XPerTile * YPerTile) / (num_warps * warp_size));
static constexpr index_t X1 = VecSize > LargestVec ? LargestVec : VecSize;
static constexpr index_t X0 = XPerTile / X1;
// When YPerTile is small, prioritize X dimension distribution
// and handle Y dimension with minimal thread usage.
// Calculate threads needed for one row.
static constexpr index_t threads_per_row = X0;
// Calculate how many rows we can process in parallel with available threads
static constexpr index_t max_parallel_rows = min(YPerTile, warp_size / threads_per_row);
// Y2: Number of rows each warp handles in one iteration
static constexpr index_t Y2 = max_parallel_rows;
// Y0: Number of warps to use (may be less than total available)
static constexpr index_t warps_needed = (YPerTile + Y2 - 1) / Y2;
static constexpr index_t Y0 = min(warps_needed, num_warps);
// Y1: Number of iterations needed to cover all rows
static constexpr index_t Y1 = (YPerTile + (Y0 * Y2) - 1) / (Y0 * Y2);
// Validation
static_assert(Y0 > 0, "Y0 must be greater than 0!");
static_assert(Y1 > 0, "Y1 must be greater than 0!");
static_assert(Y2 > 0, "Y2 must be greater than 0!");
static_assert(X0 > 0, "X0 must be greater than 0!");
static_assert(X1 > 0, "X1 must be greater than 0!");
// Ensure we don't exceed available threads per warp
static_assert(threads_per_row * Y2 <= warp_size,
"Threads per row * rows per warp must not exceed warp size!");
// Ensure we cover all elements (may over-cover due to ceiling, but that's OK)
static_assert(Y0 * Y1 * Y2 >= YPerTile,
"Y0 * Y1 * Y2 must cover at least YPerTile rows");
CK_TILE_HOST_DEVICE static constexpr auto make_2d_static_tile_distribution()
{
return make_static_tile_distribution(
tile_distribution_encoding<sequence<1>,
tuple<sequence<Y0, Y1, Y2>, sequence<X0, X1>>,
tuple<sequence<1>, sequence<1, 2>>,
tuple<sequence<0>, sequence<2, 0>>, // -> <Y0>, <Y2, X0>
sequence<1, 2>,
sequence<1, 1>>{}); // -> <Y1, X1>
}
CK_TILE_HOST_DEVICE static constexpr auto make_shuffled_2d_static_tile_distribution()
{
return make_static_tile_distribution(
tile_distribution_encoding<sequence<1>,
tuple<sequence<X0, X1>, sequence<Y0, Y1, Y2>>,
tuple<sequence<2>, sequence<2, 1>>,
tuple<sequence<0>, sequence<2, 0>>, // -> <Y0>, <Y2, X0>
sequence<1, 2>,
sequence<1, 1>>{}); // -> <X1, Y1>
}
};
// Thread raked
template <index_t BlockSize,
index_t YPerTile,

View File

@@ -96,7 +96,7 @@ struct buffer_view<address_space_enum::generic,
std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
bool>::type = false>
CK_TILE_HOST_DEVICE constexpr auto get(index_t i,
CK_TILE_DEVICE constexpr auto get(index_t i,
index_t linear_offset,
bool is_valid_element,
bool_constant<oob_conditional_check> = {}) const