Moving code-lines in hstu pipeline

This commit is contained in:
Qianfeng Zhang
2025-07-07 09:49:16 +00:00
parent 8d30e46ba5
commit 6825618dca

View File

@@ -414,6 +414,8 @@ struct HstuAttentionFwdPipelineQRKSVS
pcomp_tile = cast_tile<CompDataType>(sacc_tile);
tile_elementwise_inout(f_silu, pcomp_tile);
if constexpr(std::is_same_v<VLayout, ck_tile::tensor_layout::gemm::RowMajor>)
{
auto v_shuffle_tmp = make_static_distributed_tensor<QKVDataType>(
@@ -436,10 +438,6 @@ struct HstuAttentionFwdPipelineQRKSVS
tile_elementwise_in(v_element_func, v_tile)); // store the prefetch
};
__builtin_amdgcn_sched_barrier(0);
tile_elementwise_inout(f_silu, pcomp_tile);
if constexpr(kHasDropout)
{
auto randval_lds_ptr = reinterpret_cast<char*>(smem_ptr) +