GEMV support for S8S8S32O32 Symmetric Quantization

Introduced support for GEMV operations with group-level symmetric quantization for the S8S8S32032 API. Framework Changes: - Added macro definitions and function prototypes for GEMV with symmetric quantization in lpgemm_5loop_interface_apis.h and lpgemm_kernels.h. - LPGEMV_M_EQ1_KERN2 for the lpgemv_m_one_s8s8s32os32_sym_quant kernel, and - LPGEMV_N_EQ1_KERN2 for the lpgemv_n_one_s8s8s32os32_sym_quant kernel. - Implemented the main GEMV framework for symmetric quantization in lpgemm_s8s8s32_sym_quant.c. Kernel Changes: - lpgemv_m_one_s8s8s32os32_sym_quant for handling the case where M = 1 and implemented in lpgemv_m_kernel_s8_grp_amd512vnni.c. - lpgemv_n_one_s8s8s32os32_sym_quant for handling the case where N = 1 and implemented in lpgemv_n_kernel_s8_grp_amd512vnni.c. - Updated the buffer reordering logic for group quantization for N=1 cases in aocl_gemm_s8s8s32os32_utils.c. Notes - Ensure that group_size is a factor of both K (and KC when K > KC). - The B matrix must be provided in reordered format (mtag_b == REORDERED). AMD-Internal: [SWLCSG-3604]
2026-05-04 22:41:11 +00:00 · 2025-08-14 13:41:25 +05:30
parent 3a14417ce1
commit 76c4872718
6 changed files with 3280 additions and 142 deletions
--- a/addon/aocl_gemm/kernels/lpgemm_kernels.h
+++ b/addon/aocl_gemm/kernels/lpgemm_kernels.h
@@ -889,6 +889,36 @@ LPGEMV_M_EQ1_KERN(bfloat16,bfloat16,float,bf16bf16f32of32);
 LPGEMV_M_EQ1_KERN(uint8_t,int8_t,int32_t,u8s8s32os32);
 LPGEMV_M_EQ1_KERN(int8_t,int8_t,int32_t,s8s8s32os32);

+
+#define LPGEMV_M_EQ1_KERN2(A_type,B_type,C_type,LP_SFX) \
+void lpgemv_m_one_ ## LP_SFX \
+( \
+	const dim_t           n0, \
+	const dim_t           k, \
+	const A_type          *a, \
+	const dim_t           rs_a, \
+	const dim_t           cs_a, \
+	const AOCL_MEMORY_TAG mtag_a, \
+	const B_type          *b, \
+	dim_t                 rs_b, \
+	const dim_t           cs_b, \
+	const AOCL_MEMORY_TAG mtag_b, \
+	float                 *c, \
+	const dim_t           rs_c, \
+	const dim_t           cs_c, \
+	const C_type          alpha, \
+	const C_type          beta, \
+	dim_t                 NR, \
+	const dim_t           KC, \
+	const dim_t           n_sub_updated, \
+	const dim_t           jc_cur_loop_rem, \
+    lpgemm_grp_post_op_attr  grp_post_ops_attr, \
+	lpgemm_post_op        *post_op, \
+	lpgemm_post_op_attr   *post_op_attr \
+  ) \
+
+LPGEMV_M_EQ1_KERN2(int8_t,int8_t,int32_t,s8s8s32os32_sym_quant);
+
 #define LPGEMV_N_EQ1_KERN(A_type,B_type,C_type,LP_SFX) \
 void lpgemv_n_one_ ## LP_SFX \
 ( \
@@ -920,4 +950,32 @@ LPGEMV_N_EQ1_KERN(bfloat16, bfloat16, float,bf16bf16f32of32);
 LPGEMV_N_EQ1_KERN(uint8_t,int8_t,int32_t,u8s8s32os32);
 LPGEMV_N_EQ1_KERN(int8_t,int8_t,int32_t,s8s8s32os32);

+
+#define LPGEMV_N_EQ1_KERN2(A_type,B_type,C_type,LP_SFX) \
+void lpgemv_n_one_ ## LP_SFX \
+( \
+	const dim_t           m0, \
+	const dim_t           k, \
+	const A_type          *a, \
+	const dim_t           rs_a, \
+	const dim_t           cs_a, \
+	const AOCL_MEMORY_TAG mtag_a, \
+	const B_type          *b, \
+	const dim_t           rs_b, \
+	const dim_t           cs_b, \
+	const AOCL_MEMORY_TAG mtag_b, \
+	float                 *c, \
+	const dim_t           rs_c, \
+	const dim_t           cs_c, \
+	const C_type          alpha, \
+	const C_type          beta, \
+	const dim_t           MR, \
+	const dim_t           KC, \
+    lpgemm_grp_post_op_attr  grp_post_ops_attr, \
+	lpgemm_post_op        *post_op, \
+	lpgemm_post_op_attr   *post_op_attr \
+) \
+
+LPGEMV_N_EQ1_KERN2(int8_t,int8_t,int32_t,s8s8s32os32_sym_quant);
+
 #endif //BLIS_LPGEMM_KERN_H