Hotfix LDS data hazard in fused attention (#360)

* avoid LDS data hazard in gemm_softmax_gemm pipeline

* trivial refactors

* comments

* shrink blockwise gemm v2 thread buffer size

* reclaim A block lds space when during 2nd gemm

* amend

* amend
This commit is contained in:
Anthony Chang
2022-08-16 01:04:20 +08:00
committed by GitHub
parent 53ea4713af
commit c961ce9226
4 changed files with 88 additions and 69 deletions

View File

@@ -819,7 +819,7 @@ struct XdlopsGemm
index_t n_offset = blk_i * mfma_instr.n_per_blk + blk_td;
index_t m_offset = xdlops_i * mfma_instr.m_per_blk + blk_id * mfma_instr.group_size;
return CIndex{m_offset, n_offset};
return TransposeC ? CIndex{n_offset, m_offset} : CIndex{m_offset, n_offset};
}
static constexpr auto mfma = MfmaSelector<base_type, MPerXdlops, NPerXdlops>{};