From 2136eddf8ad54ae235a076e021d03f3e6d2865c1 Mon Sep 17 00:00:00 2001 From: Anton Gorenko Date: Fri, 31 Oct 2025 21:44:52 +0500 Subject: [PATCH] Fix synchronization issue in fwd qr pipeline with dropout (#3135) BlockFmhaPipelineQRKSVS reuses LDS for K and dropout so there must be block_sync_lds between loading k_lds_window by gemm_0 and storing dropout randval. [ROCm/composable_kernel commit: e9596228ff7f6ddb68fbd2f0f9e964cfb6af61cf] --- .../ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs.hpp b/include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs.hpp index 9e9cce5400..0836fbfce3 100644 --- a/include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs.hpp +++ b/include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qr_ks_vs.hpp @@ -578,6 +578,9 @@ struct BlockFmhaPipelineQRKSVS if constexpr(kHasDropout) { + // K and dropout use the same address in LDS, finish loading from k_lds_window by + // gemm_0 to reuse LDS. + block_sync_lds(); dropout.template Run( smem_ptr, seqlen_k_start + i_total_loops * kN0, p_compute, randval_dram_window); }