mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
Hotfix LDS data hazard in fused attention (#360)
* avoid LDS data hazard in gemm_softmax_gemm pipeline * trivial refactors * comments * shrink blockwise gemm v2 thread buffer size * reclaim A block lds space when during 2nd gemm * amend * amend
This commit is contained in:
@@ -819,7 +819,7 @@ struct XdlopsGemm
|
||||
index_t n_offset = blk_i * mfma_instr.n_per_blk + blk_td;
|
||||
index_t m_offset = xdlops_i * mfma_instr.m_per_blk + blk_id * mfma_instr.group_size;
|
||||
|
||||
return CIndex{m_offset, n_offset};
|
||||
return TransposeC ? CIndex{n_offset, m_offset} : CIndex{m_offset, n_offset};
|
||||
}
|
||||
|
||||
static constexpr auto mfma = MfmaSelector<base_type, MPerXdlops, NPerXdlops>{};
|
||||
|
||||
Reference in New Issue
Block a user