mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 12:41:26 +00:00
Fix synchronization issue in fwd qr pipeline with dropout (#3135)
BlockFmhaPipelineQRKSVS reuses LDS for K and dropout so there must be block_sync_lds between loading k_lds_window by gemm_0 and storing dropout randval.
This commit is contained in:
@@ -578,6 +578,9 @@ struct BlockFmhaPipelineQRKSVS
|
||||
|
||||
if constexpr(kHasDropout)
|
||||
{
|
||||
// K and dropout use the same address in LDS, finish loading from k_lds_window by
|
||||
// gemm_0 to reuse LDS.
|
||||
block_sync_lds();
|
||||
dropout.template Run<decltype(gemm_0), SMPLComputeDataType, RandValOutputDataType>(
|
||||
smem_ptr, seqlen_k_start + i_total_loops * kN0, p_compute, randval_dram_window);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user