Fix synchronization issue in fwd qr pipeline with dropout (#3135)

BlockFmhaPipelineQRKSVS reuses LDS for K and dropout so there must be
block_sync_lds between loading k_lds_window by gemm_0 and storing
dropout randval.
This commit is contained in:
Anton Gorenko
2025-10-31 21:44:52 +05:00
committed by GitHub
parent 5ed2046bee
commit e9596228ff

View File

@@ -578,6 +578,9 @@ struct BlockFmhaPipelineQRKSVS
if constexpr(kHasDropout)
{
// K and dropout use the same address in LDS, finish loading from k_lds_window by
// gemm_0 to reuse LDS.
block_sync_lds();
dropout.template Run<decltype(gemm_0), SMPLComputeDataType, RandValOutputDataType>(
smem_ptr, seqlen_k_start + i_total_loops * kN0, p_compute, randval_dram_window);
}