mirror of
https://github.com/amd/blis.git
synced 2026-05-04 06:21:12 +00:00
Added NEW LPGEMM TYPE- S8S8S32/S8
1. New LPGEMM type - S8S8S32/S8 is added. 2. New interface, frame and kernel files are added. 3. Frame and kernel files added/modified for S8S8S32/S8 have 2 operations - Pack B and Mat Mul 4. Pack B kernel routines to pack B matrix for VNNI and compute the sum of every column of B matrix to implement the S8S8S32 operation using the VNNI instructions. 5. Mat Mul Kernel files to compute the GEMM output using the VNNI. Here the A matrix elements are converted from int8 to uint8 (VNNI works with A matrix type uint8 only). 6. Post GEMM computation, additional operations are performed on the accumulated outputs to get the correct results. 7. With this change, two new LPGEMM APIs are introduced in LPGEMM - s8s8s32os32 and s8s8s32os8. 8. All previously added post-ops are supported on S8S8S32/S8 also. AMD-Internal: [CPUPL-3154] Change-Id: Ib18f82bde557ea4a815a63adc7870c4234bfb9d3
This commit is contained in:
@@ -50,16 +50,19 @@
|
||||
KMACRO(U8S8S32OS32, lpgemm_rowvar_u8s8s32o32_6x64) \
|
||||
KMACRO(F32F32F32OF32, lpgemm_rowvar_f32f32f32of32_avx512_6x64m) \
|
||||
KMACRO(BF16BF16F32OF32, lpgemm_rowvar_bf16bf16f32of32_6x64) \
|
||||
KMACRO(S8S8S32OS32, lpgemm_rowvar_s8s8s32os32_6x64) \
|
||||
|
||||
#define LPGEMM_PACKA_FUNC_MAP_AVX512_VNNI_BF16 \
|
||||
PAMACRO(U8S8S16OS16, NULL) \
|
||||
PAMACRO(U8S8S32OS32, packa_k64_u8s8s32o32) \
|
||||
PAMACRO(BF16BF16F32OF32, NULL) \
|
||||
PAMACRO(S8S8S32OS32, packa_k64_s8s8s32os32) \
|
||||
|
||||
#define LPGEMM_PACKB_FUNC_MAP_AVX512_VNNI_BF16 \
|
||||
PBMACRO(U8S8S16OS16, packb_nr32_u8s8s16o16) \
|
||||
PBMACRO(U8S8S32OS32, packb_nr64_u8s8s32o32) \
|
||||
PBMACRO(BF16BF16F32OF32, packb_nr64_bf16bf16f32of32) \
|
||||
PBMACRO(S8S8S32OS32, packb_nr64_s8s8s32os32) \
|
||||
|
||||
// Icelake
|
||||
#define LPGEMM_KERN_FUNC_MAP_AVX512_VNNI \
|
||||
@@ -67,16 +70,19 @@
|
||||
KMACRO(U8S8S32OS32, lpgemm_rowvar_u8s8s32o32_6x64) \
|
||||
KMACRO(F32F32F32OF32, lpgemm_rowvar_f32f32f32of32_avx512_6x64m) \
|
||||
KMACRO(BF16BF16F32OF32, lpgemm_rowvar_bf16bf16f32of32_6x64) \
|
||||
KMACRO(S8S8S32OS32, lpgemm_rowvar_s8s8s32os32_6x64) \
|
||||
|
||||
#define LPGEMM_PACKA_FUNC_MAP_AVX512_VNNI \
|
||||
PAMACRO(U8S8S16OS16, NULL) \
|
||||
PAMACRO(U8S8S32OS32, packa_k64_u8s8s32o32) \
|
||||
PAMACRO(BF16BF16F32OF32, NULL) \
|
||||
PAMACRO(S8S8S32OS32, packa_k64_s8s8s32os32) \
|
||||
|
||||
#define LPGEMM_PACKB_FUNC_MAP_AVX512_VNNI \
|
||||
PBMACRO(U8S8S16OS16, packb_nr32_u8s8s16o16) \
|
||||
PBMACRO(U8S8S32OS32, packb_nr64_u8s8s32o32) \
|
||||
PBMACRO(BF16BF16F32OF32, packb_nr64_bf16bf16f32of32) \
|
||||
PBMACRO(S8S8S32OS32, packb_nr64_s8s8s32os32) \
|
||||
|
||||
// Skylake
|
||||
#define LPGEMM_KERN_FUNC_MAP_AVX512 \
|
||||
@@ -84,16 +90,19 @@
|
||||
KMACRO(U8S8S32OS32, lpgemm_rowvar_u8s8s32o32_6x64) \
|
||||
KMACRO(F32F32F32OF32, lpgemm_rowvar_f32f32f32of32_avx512_6x64m) \
|
||||
KMACRO(BF16BF16F32OF32, lpgemm_rowvar_bf16bf16f32of32_6x64) \
|
||||
KMACRO(S8S8S32OS32, lpgemm_rowvar_s8s8s32os32_6x64) \
|
||||
|
||||
#define LPGEMM_PACKA_FUNC_MAP_AVX512 \
|
||||
PAMACRO(U8S8S16OS16, NULL) \
|
||||
PAMACRO(U8S8S32OS32, packa_k64_u8s8s32o32) \
|
||||
PAMACRO(BF16BF16F32OF32, NULL) \
|
||||
PAMACRO(S8S8S32OS32, packa_k64_s8s8s32os32) \
|
||||
|
||||
#define LPGEMM_PACKB_FUNC_MAP_AVX512 \
|
||||
PBMACRO(U8S8S16OS16, packb_nr32_u8s8s16o16) \
|
||||
PBMACRO(U8S8S32OS32, packb_nr64_u8s8s32o32) \
|
||||
PBMACRO(BF16BF16F32OF32, packb_nr64_bf16bf16f32of32) \
|
||||
PBMACRO(S8S8S32OS32, packb_nr64_s8s8s32os32) \
|
||||
|
||||
// Milan, Haswell
|
||||
#define LPGEMM_KERN_FUNC_MAP_AVX2 \
|
||||
@@ -101,15 +110,18 @@
|
||||
KMACRO(U8S8S32OS32, NULL) \
|
||||
KMACRO(F32F32F32OF32, lpgemm_rowvar_f32f32f32of32_6x16m) \
|
||||
KMACRO(BF16BF16F32OF32, NULL) \
|
||||
KMACRO(S8S8S32OS32, NULL) \
|
||||
|
||||
#define LPGEMM_PACKA_FUNC_MAP_AVX2 \
|
||||
PAMACRO(U8S8S16OS16, NULL) \
|
||||
PAMACRO(U8S8S32OS32, NULL) \
|
||||
PAMACRO(BF16BF16F32OF32, NULL) \
|
||||
PAMACRO(S8S8S32OS32, NULL) \
|
||||
|
||||
#define LPGEMM_PACKB_FUNC_MAP_AVX2 \
|
||||
PBMACRO(U8S8S16OS16, packb_nr32_u8s8s16o16) \
|
||||
PBMACRO(U8S8S32OS32, NULL) \
|
||||
PBMACRO(BF16BF16F32OF32, NULL) \
|
||||
PBMACRO(S8S8S32OS32, NULL) \
|
||||
|
||||
#endif //LPGEMM_FUNC_MAP_H
|
||||
|
||||
Reference in New Issue
Block a user