Support for MFMA_16x16x128 for fp8/bf8 (#2125)

* Adding 16x16x128 support for gfx950

* Support for fp8 and bf8

* fix input arguments for MFMA scale instruction

* clang-formatted

* Fixes for lwpck-3145 (#2138)

* Fix lds tile & cmake dep & default epilogue

* Fallback BTypeToUse to ADataType in WOQ cases

* reverting instance json file

* reverting instance json file

---------

Co-authored-by: Yi DING <yi.ding@amd.com>

[ROCm/composable_kernel commit: d107f3c3a5]
This commit is contained in:
Khushbu Agarwal
2025-04-28 18:19:50 -07:00
committed by GitHub
parent a75ab12f3a
commit 7795e976da
8 changed files with 143 additions and 10 deletions

View File

@@ -8,6 +8,10 @@ execute_process(
--list_blobs
RESULT_VARIABLE ret
)
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
${CMAKE_CURRENT_LIST_DIR}/gemm_instance_builder.py
${CMAKE_CURRENT_LIST_DIR}/configs/instance_combination.json
)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "Fail to generate kernels via Python. ${ret}")
@@ -21,7 +25,9 @@ add_custom_command(
--working_path ${CMAKE_CURRENT_BINARY_DIR}
--json ${CMAKE_CURRENT_LIST_DIR}/configs/instance_combination.json
--gen_blobs
DEPENDS ${GEMM_CODEGEN_BLOBS}
DEPENDS ${CMAKE_CURRENT_LIST_DIR}/gemm_instance_builder.py
${CMAKE_CURRENT_BINARY_DIR}/gemm_instance_blobs.txt
${CMAKE_CURRENT_LIST_DIR}/configs/instance_combination.json
)
set(EXECUTABLE_GEMM_INSTANCE "tile_engine_gemm")

View File

@@ -27,7 +27,9 @@ LAYOUT_MAP = {'r' : 'ck_tile::tensor_layout::gemm::RowMajor',
DEFAULT_EPILOGUE = """
using GemmEpilogue = ck_tile::DefaultGemm2DEpilogue<
ck_tile::DefaultGemm2DEpilogueProblem<AccDataType,
ck_tile::DefaultGemm2DEpilogueProblem<ADataType,
BDataType,
AccDataType,
CDataType,
CLayout,
kPadM,