Merge branch 'ck_tile/refactor' into ck_tile/elementwise

This commit is contained in:
rocking
2024-04-09 19:45:43 +08:00
committed by GitHub
7 changed files with 148 additions and 76 deletions

View File

@@ -280,9 +280,8 @@ struct SimplifiedGenericAttentionMask
}
else
{
// no need to do min/max here, since i_x will never be < 0 or >= x_total
index_t x_start = -y + i_y + 1; // this could be negative, but it's fine
index_t x_end = i_y + x; // this could be larger than x_total, but it's fine
index_t x_start = -y + i_y + 1; // this could be negative, but it's fine
index_t x_end = min(i_y + x, x_total); // need min in case x is padded
return i_x < x_start || i_x >= x_end;
}

View File

@@ -262,11 +262,13 @@ struct BlockFmhaPipelineQRKSVSAsync
store_tile(lse_dram_window_tmp, tile_elementwise_in(lse_element_func, lse));
}
buffer_load_fence(0); // rocm-6.1, if whole tile is masked out, need to fence(0)
// otherwise will have compute error(maybe compiler bug?)
// Note: here occ are all cleard, return it
// Note: q loaded but no fence, ignore it.
return o_acc;
}
__builtin_amdgcn_sched_barrier(0); // make sure sched_barrier(0) for this check
}
auto k_dram_block_window =
@@ -397,6 +399,7 @@ struct BlockFmhaPipelineQRKSVSAsync
k_origin.at(number<0>{}),
number<kM0>{},
number<kN0>{});
if(need_perpixel_check)
{
set_tile_if(