From 564276eff982cca596e76ebb16cf271cca10c3f6 Mon Sep 17 00:00:00 2001 From: "assistant-librarian[bot]" Date: Mon, 8 Dec 2025 17:14:48 +0000 Subject: [PATCH] Merge commit 'ca6143f0b2237a1af80ef5550f1b774fd463676d' into develop --- .../block_fmha_bwd_dq_dk_dv_pipeline_kr_ktr_vr.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/ck_tile/ops/fmha/pipeline/block_fmha_bwd_dq_dk_dv_pipeline_kr_ktr_vr.hpp b/include/ck_tile/ops/fmha/pipeline/block_fmha_bwd_dq_dk_dv_pipeline_kr_ktr_vr.hpp index 854e45c432..7cc424597a 100644 --- a/include/ck_tile/ops/fmha/pipeline/block_fmha_bwd_dq_dk_dv_pipeline_kr_ktr_vr.hpp +++ b/include/ck_tile/ops/fmha/pipeline/block_fmha_bwd_dq_dk_dv_pipeline_kr_ktr_vr.hpp @@ -552,6 +552,15 @@ struct BlockFmhaBwdDQDKDVPipelineKRKTRVR }); }); } +#if defined(__gfx9__) + else + { + // Workaround for a compiler issue: sometimes there are not enough wait-states + // between v_mfma_f32... and v_accvgpr_read_b32 instructions if they are separated + // by s_cbranch. + tile_elementwise_inout([](auto& x) { asm("; force move to %0" : "+v"(x)); }, s_acc); + } +#endif { bool need_perpixel_check = mask.IsEdgeTile(