mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
Use new mfma instructions for FP8 on gfx950 (#2202)
* Add logic to use new mfma instructions for fp8 bf8 * Fix example_gemm_xdl_fp8_pk_i4_bpreshuffle_v3 on gfx950 and run clang format * Update include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp Co-authored-by: Andriy Roshchenko <107577548+andriy-ca@users.noreply.github.com> * Fix intrin_mfma f8 calls due to merge mistake --------- Co-authored-by: Andriy Roshchenko <107577548+andriy-ca@users.noreply.github.com>
This commit is contained in:
@@ -124,7 +124,6 @@ struct BlockwiseGemmXdlops_pipeline_bpreshuffle_bdequant_v3<BlockGemmPipelineSch
|
||||
using Base::I1;
|
||||
using Base::I2;
|
||||
using Base::KRepeat;
|
||||
using Base::xdlops_gemm;
|
||||
using typename Base::HotLoopInstList;
|
||||
|
||||
using Base::a_block_desc_m0_m1_m2_k;
|
||||
@@ -145,6 +144,9 @@ struct BlockwiseGemmXdlops_pipeline_bpreshuffle_bdequant_v3<BlockGemmPipelineSch
|
||||
|
||||
using Base::MWaves;
|
||||
|
||||
static constexpr auto xdlops_gemm =
|
||||
XdlopsGemm<ComputeDataType, MPerXDL, NPerXDL, KPack, BDataType>{};
|
||||
|
||||
static constexpr index_t PrefetchStages = 2;
|
||||
static constexpr index_t PrefillStages = 1;
|
||||
static constexpr index_t GlobalBufferNum = 1;
|
||||
|
||||
Reference in New Issue
Block a user