mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
[CK_TILE] FMHA BWD Optimization For GFX950 (#2628)
* simplify fmha_bwd_kernel MakeKargs & dq_dram_window * simply duplicate * trload pipeline * Try two-stage * add prefetch * optimize & iglp
This commit is contained in:
@@ -51,6 +51,12 @@ inline std::string get_device_name()
|
||||
default: return name;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool is_load_tr_supported()
|
||||
{
|
||||
// Check if load transpose is supported.
|
||||
return get_device_name() == "gfx950";
|
||||
}
|
||||
} // namespace ck_tile
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user