Merge commit 'f5c2f09036cdc22dc8944719215dd47003c50a24' into develop

This commit is contained in:
assistant-librarian[bot]
2026-01-24 00:38:47 +00:00
parent e2e058bcbc
commit 6a21c125a0
6 changed files with 115 additions and 16 deletions

View File

@@ -42,7 +42,8 @@ struct StreamKTilePartitionerBase
CK_TILE_HOST_DEVICE index_t get_partials_buffer_size(index_t acc_element_bytes) const noexcept;
/**
* @brief Calculates the total space needed for the flags buffer.
* @brief Calculates the total space needed for the flags buffer whose total byte size is
* 128B-aligned.
*
* @return index_t The number of bytes needed for the flags buffer.
*/

View File

@@ -58,7 +58,10 @@ CK_TILE_HOST_DEVICE index_t
StreamKTilePartitionerBase<BlockGemmShapeType, ReductionStrategyType>::get_flags_buffer_size()
const noexcept
{
return sizeof(index_t) * sk_ctas_;
constexpr index_t alignment = 128;
const index_t required_bytes = sizeof(index_t) * sk_ctas_;
const index_t padded_bytes = ck_tile::integer_least_multiple(required_bytes, alignment);
return padded_bytes;
}
template <typename BlockGemmShapeType, StreamKReductionStrategy ReductionStrategyType>