[CK_TILE][FMHA] Enable dwordx4 loading in async_load_tile_raw() (#2549)

* Support async load dwordx4

* Enlarge load size on gfx950
This commit is contained in:
Po Yen Chen
2025-08-22 10:13:47 +08:00
committed by GitHub
parent 4cfa2c7158
commit 4a7ecce096
3 changed files with 103 additions and 51 deletions

View File

@@ -364,7 +364,13 @@ struct BlockFmhaPipelineQXKSVSCustomPolicy : BlockFmhaPipelineQXCustomPolicy<QLo
using KDataType = remove_cvref_t<typename Problem::KDataType>;
if constexpr(AsyncCopy)
{
return 4 / sizeof(KDataType);
#if defined(__gfx950__)
constexpr index_t MaxLoadSizeInBytes = 4 * 4; // dwordx4
#else
constexpr index_t MaxLoadSizeInBytes = 4; // dword
#endif
return MaxLoadSizeInBytes / sizeof(KDataType);
}
else
{