mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-24 23:05:54 +00:00
Merge commit 'f5c2f09036cdc22dc8944719215dd47003c50a24' into develop
This commit is contained in:
@@ -42,7 +42,8 @@ struct StreamKTilePartitionerBase
|
||||
CK_TILE_HOST_DEVICE index_t get_partials_buffer_size(index_t acc_element_bytes) const noexcept;
|
||||
|
||||
/**
|
||||
* @brief Calculates the total space needed for the flags buffer.
|
||||
* @brief Calculates the total space needed for the flags buffer whose total byte size is
|
||||
* 128B-aligned.
|
||||
*
|
||||
* @return index_t The number of bytes needed for the flags buffer.
|
||||
*/
|
||||
|
||||
@@ -58,7 +58,10 @@ CK_TILE_HOST_DEVICE index_t
|
||||
StreamKTilePartitionerBase<BlockGemmShapeType, ReductionStrategyType>::get_flags_buffer_size()
|
||||
const noexcept
|
||||
{
|
||||
return sizeof(index_t) * sk_ctas_;
|
||||
constexpr index_t alignment = 128;
|
||||
const index_t required_bytes = sizeof(index_t) * sk_ctas_;
|
||||
const index_t padded_bytes = ck_tile::integer_least_multiple(required_bytes, alignment);
|
||||
return padded_bytes;
|
||||
}
|
||||
|
||||
template <typename BlockGemmShapeType, StreamKReductionStrategy ReductionStrategyType>
|
||||
|
||||
Reference in New Issue
Block a user