mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-05 14:11:29 +00:00
* add DeviceGemmSplitKXdl * add file device_gemm_splitk_xdl.hpp * set c matrix zero * using atomic * add all tuning parameter to f32 mkkn * grid size change to 720 * add tunning parameter for NT * add tunning parameter for TN * add tunning parameter for TT * add m=96tunning parameter * add lost config * debug * fix sweep * add failed tuning params * fixed sweep logic * clean * add padding to M/N for irr tile size * clean code * add element wise operation * fixed MPerBlock=96 * remove marco for slpitk swtich * add test * add new line at the end of device_gemm_xdl_instance.hpp * remove step hack * seperate split-k instance files * add tunning parameters * change disired grid size to parameters * remove slice length * add desiredgridsize parameter to ckProfiler * add losting file device_gemm_xdl_splitk_instance.hpp * change desired gride size to kbatch * format * format * clean up * add selection of device_instances * clean code * clean code * add small tile size in fp16 nn * test for rocm 4.5 * merge develop * clean * clean * clean * remove no-use code * add padding switch to device_gemm_xdl * add padding switch for ksplit fp32 * clean * clean * add files * rename * Update profiler.cpp * format Co-authored-by: ltqin <letaoqin@amd.com> Co-authored-by: ltqin <letao.qin@amd.com> Co-authored-by: Chao Liu <chao.liu2@amd.com>
18 lines
260 B
C++
18 lines
260 B
C++
#ifndef GEMM_SPECIALIZATION
|
|
#define GEMM_SPECIALIZATION
|
|
|
|
namespace ck {
|
|
namespace tensor_operation {
|
|
namespace device {
|
|
|
|
enum GemmSpecialization_t
|
|
{
|
|
Default,
|
|
MNPadding,
|
|
};
|
|
|
|
} // namespace device
|
|
} // namespace tensor_operation
|
|
} // namespace ck
|
|
#endif
|