Merge commit 'cafaeb6b7bac4e18b0a5341cd14f54224292a0c9' into develop

This commit is contained in:
assistant-librarian[bot]
2025-10-29 15:12:59 +00:00
parent 83b2a1d876
commit 26e9ec020f
29 changed files with 1970 additions and 282 deletions

View File

@@ -25,7 +25,7 @@
* (3) number of iterations to cover the entire Y axis.
* The raked here represents how data is partitioned across different processing granularity.
* It represents howe we are going to access the data in thread, warp, or blocked in contiguous
* It represents how we are going to access the data in thread, warp, or blocked in contiguous
region.
* From below, the qualifier for 'raked' is the part of warp/thread hierarchy
* in the split of Y tile dimension where the iteration happens,
@@ -101,7 +101,7 @@ enum struct tile_distribution_pattern
* @brief Block raked pattern - aka linear.
*
*/
block_raked,
block_raked
};
struct tile_distribution_encoding_pattern
@@ -144,7 +144,6 @@ struct tile_distribution_encoding_pattern_2d<BlockSize,
NumWaveGroups>
: public tile_distribution_encoding_pattern
{
// TODO: make pattern where below condition does not need to hold - GGemmMultiDSplitk!
static_assert(XPerTile % VecSize == 0, "XPerTile must be a multiple of VecSize!");
static constexpr index_t warp_size = get_warp_size();