mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Add a workaround for a compiler issue for bwd on gfx90a and ROCm 7.1.1 (#3369)
Sometimes there are not enough wait-states between v_mfma_f32... and v_accvgpr_read_b32 instructions if they are separated by s_cbranch. The workaround is to read accvgprs to vgpr before branching.
This commit is contained in:
@@ -552,6 +552,15 @@ struct BlockFmhaBwdDQDKDVPipelineKRKTRVR
|
||||
});
|
||||
});
|
||||
}
|
||||
#if defined(__gfx9__)
|
||||
else
|
||||
{
|
||||
// Workaround for a compiler issue: sometimes there are not enough wait-states
|
||||
// between v_mfma_f32... and v_accvgpr_read_b32 instructions if they are separated
|
||||
// by s_cbranch.
|
||||
tile_elementwise_inout([](auto& x) { asm("; force move to %0" : "+v"(x)); }, s_acc);
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
bool need_perpixel_check = mask.IsEdgeTile(
|
||||
|
||||
Reference in New Issue
Block a user