Merge commit '0b13697a88e77a733d36b14353df1c0a7ae756df' into develop

This commit is contained in:
assistant-librarian[bot]
2026-01-22 08:17:11 +00:00
parent 45e2275fc4
commit 8f10da355a
2 changed files with 10 additions and 4 deletions

View File

@@ -329,6 +329,8 @@ struct BlockFmhaPipelineQRKSVSAsync
{
if(num_total_loop <= 0)
{
buffer_load_fence(0); // rocm-7.1.1, if whole tile is masked out, need to fence(0)
// otherwise will have compute error(maybe compiler bug?)
if constexpr(kStoreLSE)
{
auto lse =
@@ -345,10 +347,8 @@ struct BlockFmhaPipelineQRKSVSAsync
store_tile(lse_dram_window_tmp, tile_elementwise_in(lse_element_func, lse));
}
buffer_load_fence(0); // rocm-6.1, if whole tile is masked out, need to fence(0)
// otherwise will have compute error(maybe compiler bug?)
// Note: here occ are all cleard, return it
// Note: here occ are all cleared, return it
return o_acc;
}
__builtin_amdgcn_sched_barrier(0); // make sure sched_barrier(0) for this check