mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 22:39:03 +00:00
[CK_TILE][FMHA]Add new tile size for async (#3623)
* Revert "Revert "[CK_TILE][FMHA] Add new tile size for async (#3586)" (#3613)"
This reverts commit 8f75869408.
* Add new tile_size for async pipeline
Signed-off-by: Linjun-AMD <Jun.Lin@amd.com>
* Update include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs_async.hpp
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---------
Signed-off-by: Linjun-AMD <Jun.Lin@amd.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -329,6 +329,8 @@ struct BlockFmhaPipelineQRKSVSAsync
|
||||
{
|
||||
if(num_total_loop <= 0)
|
||||
{
|
||||
buffer_load_fence(0); // rocm-7.1.1, if whole tile is masked out, need to fence(0)
|
||||
// otherwise will have compute error(maybe compiler bug?)
|
||||
if constexpr(kStoreLSE)
|
||||
{
|
||||
auto lse =
|
||||
@@ -345,10 +347,8 @@ struct BlockFmhaPipelineQRKSVSAsync
|
||||
|
||||
store_tile(lse_dram_window_tmp, tile_elementwise_in(lse_element_func, lse));
|
||||
}
|
||||
buffer_load_fence(0); // rocm-6.1, if whole tile is masked out, need to fence(0)
|
||||
// otherwise will have compute error(maybe compiler bug?)
|
||||
|
||||
// Note: here occ are all cleard, return it
|
||||
// Note: here occ are all cleared, return it
|
||||
return o_acc;
|
||||
}
|
||||
__builtin_amdgcn_sched_barrier(0); // make sure sched_barrier(0) for this check
|
||||
|
||||
Reference in New Issue
Block a user