diff --git a/include/ck_tile/ops/fmha/pipeline/block_fmha_bwd_dq_dk_dv_pipeline_kr_ktr_vr.hpp b/include/ck_tile/ops/fmha/pipeline/block_fmha_bwd_dq_dk_dv_pipeline_kr_ktr_vr.hpp index 854e45c432..7cc424597a 100644 --- a/include/ck_tile/ops/fmha/pipeline/block_fmha_bwd_dq_dk_dv_pipeline_kr_ktr_vr.hpp +++ b/include/ck_tile/ops/fmha/pipeline/block_fmha_bwd_dq_dk_dv_pipeline_kr_ktr_vr.hpp @@ -552,6 +552,15 @@ struct BlockFmhaBwdDQDKDVPipelineKRKTRVR }); }); } +#if defined(__gfx9__) + else + { + // Workaround for a compiler issue: sometimes there are not enough wait-states + // between v_mfma_f32... and v_accvgpr_read_b32 instructions if they are separated + // by s_cbranch. + tile_elementwise_inout([](auto& x) { asm("; force move to %0" : "+v"(x)); }, s_acc); + } +#endif { bool need_perpixel_check = mask.IsEdgeTile(