Merge commit 'f161b5b738781c71bd5f2c191561b81f679ba9ed' into develop

This commit is contained in:
assistant-librarian[bot]
2025-09-23 23:11:18 +00:00
parent c39d5ca2c5
commit a55a7e37ec
40 changed files with 167 additions and 273 deletions

View File

@@ -598,8 +598,8 @@ struct FlatmmKernel
CK_TILE_DEVICE void operator()(KernelArgs kargs) const
{
const auto [iM, iN] = TilePartitioner{kargs.M, kargs.N}.GetOutputTileIndex(blockIdx.x);
const index_t i_m = amd_wave_read_first_lane(iM * TilePartitioner::MPerBlock);
const index_t i_n = amd_wave_read_first_lane(iN * TilePartitioner::NPerBlock);
const index_t i_m = __builtin_amdgcn_readfirstlane(iM * TilePartitioner::MPerBlock);
const index_t i_n = __builtin_amdgcn_readfirstlane(iN * TilePartitioner::NPerBlock);
const SplitKBatchOffset splitk_batch_offset(kargs);
// options