mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-07-01 04:07:56 +00:00
Remove unused code.
This commit is contained in:
@@ -135,88 +135,6 @@ struct tile_distribution_encoding_pattern_2d : public tile_distribution_encoding
|
||||
{
|
||||
};
|
||||
|
||||
// Sparse rows
|
||||
template <index_t BlockSize,
|
||||
index_t YPerTile,
|
||||
index_t XPerTile,
|
||||
index_t VecSize,
|
||||
index_t NumWaveGroups>
|
||||
struct tile_distribution_encoding_pattern_2d<BlockSize,
|
||||
YPerTile,
|
||||
XPerTile,
|
||||
VecSize,
|
||||
tile_distribution_pattern::sparse_row,
|
||||
NumWaveGroups>
|
||||
: public tile_distribution_encoding_pattern
|
||||
{
|
||||
static_assert(XPerTile % VecSize == 0, "XPerTile must be a multiple of VecSize!");
|
||||
static_assert(NumWaveGroups == 1, "NumWaveGroups must be 1 for sparse row pattern!");
|
||||
|
||||
static constexpr index_t warp_size = get_warp_size();
|
||||
static constexpr index_t num_warps = max(1, BlockSize / warp_size);
|
||||
|
||||
// Calculate optimal vector size
|
||||
static constexpr index_t LargestVec = max(1, (XPerTile * YPerTile) / (num_warps * warp_size));
|
||||
static constexpr index_t X1 = VecSize > LargestVec ? LargestVec : VecSize;
|
||||
static constexpr index_t X0 = XPerTile / X1;
|
||||
|
||||
// When YPerTile is small, prioritize X dimension distribution
|
||||
// and handle Y dimension with minimal thread usage.
|
||||
|
||||
// Calculate threads needed for one row.
|
||||
static constexpr index_t threads_per_row = X0;
|
||||
|
||||
// Calculate how many rows we can process in parallel with available threads
|
||||
static constexpr index_t max_parallel_rows = min(YPerTile, warp_size / threads_per_row);
|
||||
|
||||
// Y2: Number of rows each warp handles in one iteration
|
||||
static constexpr index_t Y2 = max_parallel_rows;
|
||||
|
||||
// Y0: Number of warps to use (may be less than total available)
|
||||
static constexpr index_t warps_needed = (YPerTile + Y2 - 1) / Y2;
|
||||
static constexpr index_t Y0 = min(warps_needed, num_warps);
|
||||
|
||||
// Y1: Number of iterations needed to cover all rows
|
||||
static constexpr index_t Y1 = (YPerTile + (Y0 * Y2) - 1) / (Y0 * Y2);
|
||||
|
||||
// Validation
|
||||
static_assert(Y0 > 0, "Y0 must be greater than 0!");
|
||||
static_assert(Y1 > 0, "Y1 must be greater than 0!");
|
||||
static_assert(Y2 > 0, "Y2 must be greater than 0!");
|
||||
static_assert(X0 > 0, "X0 must be greater than 0!");
|
||||
static_assert(X1 > 0, "X1 must be greater than 0!");
|
||||
|
||||
// Ensure we don't exceed available threads per warp
|
||||
static_assert(threads_per_row * Y2 <= warp_size,
|
||||
"Threads per row * rows per warp must not exceed warp size!");
|
||||
|
||||
// Ensure we cover all elements (may over-cover due to ceiling, but that's OK)
|
||||
static_assert(Y0 * Y1 * Y2 >= YPerTile,
|
||||
"Y0 * Y1 * Y2 must cover at least YPerTile rows");
|
||||
|
||||
CK_TILE_HOST_DEVICE static constexpr auto make_2d_static_tile_distribution()
|
||||
{
|
||||
return make_static_tile_distribution(
|
||||
tile_distribution_encoding<sequence<1>,
|
||||
tuple<sequence<Y0, Y1, Y2>, sequence<X0, X1>>,
|
||||
tuple<sequence<1>, sequence<1, 2>>,
|
||||
tuple<sequence<0>, sequence<2, 0>>, // -> <Y0>, <Y2, X0>
|
||||
sequence<1, 2>,
|
||||
sequence<1, 1>>{}); // -> <Y1, X1>
|
||||
}
|
||||
|
||||
CK_TILE_HOST_DEVICE static constexpr auto make_shuffled_2d_static_tile_distribution()
|
||||
{
|
||||
return make_static_tile_distribution(
|
||||
tile_distribution_encoding<sequence<1>,
|
||||
tuple<sequence<X0, X1>, sequence<Y0, Y1, Y2>>,
|
||||
tuple<sequence<2>, sequence<2, 1>>,
|
||||
tuple<sequence<0>, sequence<2, 0>>, // -> <Y0>, <Y2, X0>
|
||||
sequence<1, 2>,
|
||||
sequence<1, 1>>{}); // -> <X1, Y1>
|
||||
}
|
||||
};
|
||||
|
||||
// Thread raked
|
||||
template <index_t BlockSize,
|
||||
index_t YPerTile,
|
||||
|
||||
@@ -96,7 +96,7 @@ struct buffer_view<address_space_enum::generic,
|
||||
std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
|
||||
typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
|
||||
bool>::type = false>
|
||||
CK_TILE_HOST_DEVICE constexpr auto get(index_t i,
|
||||
CK_TILE_DEVICE constexpr auto get(index_t i,
|
||||
index_t linear_offset,
|
||||
bool is_valid_element,
|
||||
bool_constant<oob_conditional_check> = {}) const
|
||||
|
||||
Reference in New Issue
Block a user