Added new API in aocl_gemm to support A bf16 data type and B s4 data type

Description: 1. Added a new API aocl_gemm_bf16s4f32of32 to support for WoQ (Weight-only-Quantization) in LLM's 2. The API supports only reordered B matrix of data size signed 4 bits (S4). 3. Substracting zero point and multiplying with scale on B matrix is performed in packing B. 4. zero point and scale data should be passed by user through pre-ops data structure. 5. The API is still in experimental state and NOT tested. AMD-Internal: SWLCSG-2943 Change-Id: I10b159b64c2e2aaf39da5462685618ba8cc800ee
2026-05-05 06:51:11 +00:00 · 2024-07-22 09:22:35 +00:00
parent 49949f488f
commit c6dd7c1b4b
17 changed files with 1460 additions and 33 deletions
--- a/addon/aocl_gemm/frame/lpgemm_5loop_interface_apis.h
+++ b/addon/aocl_gemm/frame/lpgemm_5loop_interface_apis.h
@@ -72,6 +72,35 @@ LPGEMM_5LOOP(bfloat16,bfloat16,float,bf16bf16f32of32);
 LPGEMM_5LOOP(int8_t,int8_t,int32_t,s8s8s32o32);
 LPGEMM_5LOOP(int8_t,int8_t,int16_t,s8s8s16o16);

+#define LPGEMM_5LOOP1(A_type,B_type,C_type,LP_SFX) \
+void lpgemm_rowvar_ ## LP_SFX \
+     ( \
+       const dim_t           m, \
+       const dim_t           n, \
+       const dim_t           k, \
+       const A_type*         a, \
+       const dim_t           rs_a, \
+       const dim_t           cs_a, \
+       const AOCL_MEMORY_TAG mtag_a, \
+       const B_type*         b, \
+       const dim_t           rs_b, \
+       const dim_t           cs_b, \
+       const AOCL_MEMORY_TAG mtag_b, \
+       C_type*               c, \
+       const dim_t           rs_c, \
+       const dim_t           cs_c, \
+       const C_type          alpha, \
+       const C_type          beta, \
+       rntm_t*               rntm, \
+       lpgemm_thrinfo_t*     thread, \
+       lpgemm_cntx_t*        lcntx, \
+       lpgemm_pre_op*        pre_op_list, \
+       lpgemm_post_op*       post_op_list, \
+       AOCL_STORAGE_TYPE     c_downscale \
+     ) \
+
+LPGEMM_5LOOP1(bfloat16,int8_t,float,bf16s4f32of32);
+
 #define LPGEMV(A_type, B_type, C_type, LP_SFX) \
 void lpgemv_rowvar_ ## LP_SFX \
    ( \