From 09e34fd2bd338832148ae2e92094926b717c49ef Mon Sep 17 00:00:00 2001 From: orequest Date: Wed, 30 Aug 2023 09:13:00 +0530 Subject: [PATCH] Added optimised CGEMM function pointers in zen4 cntx 1. Two CGEMM function pointers are added for different storage schemes 1. bli_cgemmsup_rv_zen_asm_3x8m 2. bli_cgemmsup_rv_zen_asm_3x8n 2. In previous commit: (Level-3 triangular routines now use different block sizes and kernels Commit Id: 79e174ff0aaa86ea9cdb2e74b7f6ca71465e6191) 1. bli_cntx_set_l3_sup_tri_kers cntx function was created 2. Function holds optimised function pointers for GEMMT/SYRK API's 3. It avoids over riding default block sizes which improves the performance 4. This function did not include optimised CGEMM function pointers leading to regression as reference kernels were invoked 3. With this commit, 2 optimized CGEMM function pointers are added in bli_cntx_set_l3_sup_tri_kers 1. This fixes the regression as optimized CGEMM functions are invoked AMD-Internal: [CPUPL-3831] [CPUPL-3830] Change-Id: Ie8b41a5e62439de2a65e7df0b07d63ee2383e51e --- config/zen4/bli_cntx_init_zen4.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/config/zen4/bli_cntx_init_zen4.c b/config/zen4/bli_cntx_init_zen4.c index f5b7be7de..8ef336d43 100644 --- a/config/zen4/bli_cntx_init_zen4.c +++ b/config/zen4/bli_cntx_init_zen4.c @@ -365,7 +365,7 @@ void bli_cntx_init_zen4( cntx_t* cntx ) bli_cntx_set_l3_sup_tri_kers ( - 24, + 30, BLIS_RRR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, TRUE, BLIS_RRC, BLIS_FLOAT, bli_sgemmsup_rd_zen_asm_6x16m, TRUE, BLIS_RCR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, TRUE, @@ -382,6 +382,12 @@ void bli_cntx_init_zen4( cntx_t* cntx ) BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE, BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, + BLIS_RRR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE, + BLIS_RCR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE, + BLIS_CRR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE, + BLIS_RCC, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE, + BLIS_CCR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE, + BLIS_CCC, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE, BLIS_RRR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, TRUE, BLIS_RRC, BLIS_DCOMPLEX, bli_zgemmsup_rd_zen_asm_3x4m, TRUE, BLIS_RCR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, TRUE, @@ -392,4 +398,4 @@ void bli_cntx_init_zen4( cntx_t* cntx ) BLIS_CCC, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, TRUE, cntx ); -} \ No newline at end of file +}