mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
[CK_TILE][FMHA] Enable dwordx4 loading in async_load_tile_raw() (#2549)
* Support async load dwordx4 * Enlarge load size on gfx950
This commit is contained in:
@@ -364,7 +364,13 @@ struct BlockFmhaPipelineQXKSVSCustomPolicy : BlockFmhaPipelineQXCustomPolicy<QLo
|
||||
using KDataType = remove_cvref_t<typename Problem::KDataType>;
|
||||
if constexpr(AsyncCopy)
|
||||
{
|
||||
return 4 / sizeof(KDataType);
|
||||
#if defined(__gfx950__)
|
||||
constexpr index_t MaxLoadSizeInBytes = 4 * 4; // dwordx4
|
||||
#else
|
||||
constexpr index_t MaxLoadSizeInBytes = 4; // dword
|
||||
#endif
|
||||
|
||||
return MaxLoadSizeInBytes / sizeof(KDataType);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user