Added new API in aocl_gemm to support A bf16 data type and B s4 data type

Description: 1. Added a new API aocl_gemm_bf16s4f32of32 to support for WoQ (Weight-only-Quantization) in LLM's 2. The API supports only reordered B matrix of data size signed 4 bits (S4). 3. Substracting zero point and multiplying with scale on B matrix is performed in packing B. 4. zero point and scale data should be passed by user through pre-ops data structure. 5. The API is still in experimental state and NOT tested. AMD-Internal: SWLCSG-2943 Change-Id: I10b159b64c2e2aaf39da5462685618ba8cc800ee
2026-05-05 15:01:13 +00:00 · 2024-07-22 09:22:35 +00:00
parent 49949f488f
commit c6dd7c1b4b
17 changed files with 1460 additions and 33 deletions
--- a/addon/aocl_gemm/config/lpgemm_func_map.h
+++ b/addon/aocl_gemm/config/lpgemm_func_map.h
@@ -67,6 +67,7 @@
 	PBMACRO(S8S8S32OS32, packb_nr64_s8s8s32os32) \
 	PBMACRO(S8S8S16OS16, packb_nr32_s8s8s16o16) \
 	PBMACRO(U8S4S32OS32, packb_nr64_u8s4s32o32) \
+	PBMACRO(BF16S4F32OF32, packb_nr64_bf16s4f32of32) \

 #define LPGEMM_UTIL_KERN_FUNC_MAP_AVX512_VNNI_BF16 \
 	UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx512_kernel) \
@@ -121,10 +122,11 @@
 #define LPGEMM_PACKB_FUNC_MAP_AVX512 \
 	PBMACRO(U8S8S16OS16, packb_nr32_u8s8s16o16) \
 	PBMACRO(U8S8S32OS32, packb_nr64_u8s8s32o32) \
-	PBMACRO(BF16BF16F32OF32, packb_nr64_bf16bf16f32of32) \
+	PBMACRO(BF16BF16F32OF32, NULL) \
 	PBMACRO(S8S8S32OS32, packb_nr64_s8s8s32os32) \
 	PBMACRO(S8S8S16OS16, packb_nr32_s8s8s16o16) \
 	PBMACRO(U8S4S32OS32, packb_nr64_u8s4s32o32) \
+	PBMACRO(BF16S4F32OF32, NULL) \

 #define LPGEMM_UTIL_KERN_FUNC_MAP_AVX512 \
 	UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx512_kernel) \