mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-25 23:34:46 +00:00
[GEMM] gemm_universal related optimization (#1453)
* replace buffer_atomic with global_atomic
* fixed global_atomic_add
* added bf16 atomic_add
* format
* clang-format-12
* clean
* clean
* add guards
* Update gtest.cmake
* enabled splitk_gemm_multi_d
* format
* add ckProfiler
* format
* fixed naming
* format
* clean
* clean
* add guards
* fix clang format
* format
* add kbatch printout
* clean
* Add rocm6.2 related gemm optimization
* Limit bf16 atomic usage
* remove redundant RCR gemm_universal instance
* Add RRR fp8 gemm universal instance
* Bug fix
* Add GPU_TARGET guard to FP8/BF8 target
* bug fix
* update cmake
* remove all fp8/bf8 example if arch not support
* Enable fp8 RRR support in ckProfiler
* limit greedy-reverse flag to gemm_universal in ckProfiler
---------
Co-authored-by: Jing Zhang <jizhan@fb.com>
Co-authored-by: Jing Zhang <jizhan@meta.com>
Co-authored-by: zjing14 <zhangjing14@gmail.com>
Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
Co-authored-by: illsilin <Illia.Silin@amd.com>
[ROCm/composable_kernel commit: 3049b5467c]
This commit is contained in:
@@ -100,16 +100,18 @@ list(APPEND GEMM_INSTANCES
|
||||
device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp)
|
||||
|
||||
list(APPEND GEMM_INSTANCES
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp)
|
||||
if(GPU_TARGETS MATCHES "gfx94")
|
||||
list(APPEND GEMM_INSTANCES
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp
|
||||
device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp)
|
||||
endif()
|
||||
|
||||
list(APPEND GEMM_INSTANCES
|
||||
device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp
|
||||
|
||||
Reference in New Issue
Block a user