Merge commit 'c254f3d7b4cccae5c884b419842a01eec4ed74fc' into develop

This commit is contained in:
assistant-librarian[bot]
2025-09-10 00:33:03 +00:00
parent 62c8b1c1f6
commit b2b0389f76
41 changed files with 647 additions and 655 deletions

View File

@@ -127,7 +127,10 @@ struct FlatmmKernel
return dim3(TilePartitioner::GridSize(M, N), 1, KBatch);
}
CK_TILE_HOST static constexpr auto BlockSize() { return dim3(kBlockSize); }
CK_TILE_HOST static constexpr auto BlockSize()
{
return is_wave32() ? dim3(kBlockSize / 2) : dim3(kBlockSize);
}
CK_TILE_HOST static constexpr KernelArgs
MakeKernelArgs(const FlatmmHostArgs<NumDTensor>& hostArgs)