mirror of
https://github.com/amd/blis.git
synced 2026-05-05 06:51:11 +00:00
Added new API in aocl_gemm to support A bf16 data type and B s4 data type
Description: 1. Added a new API aocl_gemm_bf16s4f32of32 to support for WoQ (Weight-only-Quantization) in LLM's 2. The API supports only reordered B matrix of data size signed 4 bits (S4). 3. Substracting zero point and multiplying with scale on B matrix is performed in packing B. 4. zero point and scale data should be passed by user through pre-ops data structure. 5. The API is still in experimental state and NOT tested. AMD-Internal: SWLCSG-2943 Change-Id: I10b159b64c2e2aaf39da5462685618ba8cc800ee
This commit is contained in:
@@ -72,6 +72,35 @@ LPGEMM_5LOOP(bfloat16,bfloat16,float,bf16bf16f32of32);
|
||||
LPGEMM_5LOOP(int8_t,int8_t,int32_t,s8s8s32o32);
|
||||
LPGEMM_5LOOP(int8_t,int8_t,int16_t,s8s8s16o16);
|
||||
|
||||
#define LPGEMM_5LOOP1(A_type,B_type,C_type,LP_SFX) \
|
||||
void lpgemm_rowvar_ ## LP_SFX \
|
||||
( \
|
||||
const dim_t m, \
|
||||
const dim_t n, \
|
||||
const dim_t k, \
|
||||
const A_type* a, \
|
||||
const dim_t rs_a, \
|
||||
const dim_t cs_a, \
|
||||
const AOCL_MEMORY_TAG mtag_a, \
|
||||
const B_type* b, \
|
||||
const dim_t rs_b, \
|
||||
const dim_t cs_b, \
|
||||
const AOCL_MEMORY_TAG mtag_b, \
|
||||
C_type* c, \
|
||||
const dim_t rs_c, \
|
||||
const dim_t cs_c, \
|
||||
const C_type alpha, \
|
||||
const C_type beta, \
|
||||
rntm_t* rntm, \
|
||||
lpgemm_thrinfo_t* thread, \
|
||||
lpgemm_cntx_t* lcntx, \
|
||||
lpgemm_pre_op* pre_op_list, \
|
||||
lpgemm_post_op* post_op_list, \
|
||||
AOCL_STORAGE_TYPE c_downscale \
|
||||
) \
|
||||
|
||||
LPGEMM_5LOOP1(bfloat16,int8_t,float,bf16s4f32of32);
|
||||
|
||||
#define LPGEMV(A_type, B_type, C_type, LP_SFX) \
|
||||
void lpgemv_rowvar_ ## LP_SFX \
|
||||
( \
|
||||
|
||||
Reference in New Issue
Block a user