From d3542ff0e06ee47aee7b2df0fe70e3053c159a5d Mon Sep 17 00:00:00 2001 From: Madan mohan Manokar Date: Mon, 5 Jul 2021 18:40:34 +0530 Subject: [PATCH] 3m_sqp conjugate support added 1. 3m_sqp support for A matrix with conjugate_no_transpose and conjugate_transpose added. AMD-Internal: [CPUPL-1521] Change-Id: Ie6e5c49cf86f7d3b95d78705cf445e57f20b3d1f --- frame/compat/bla_gemm.c | 3 +- kernels/zen/3/bli_gemm_sqp.c | 46 +++++-- kernels/zen/3/bli_gemm_sqp_kernels.c | 179 ++++++++++++++++++++++++--- kernels/zen/3/bli_gemm_sqp_kernels.h | 2 +- 4 files changed, 198 insertions(+), 32 deletions(-) diff --git a/frame/compat/bla_gemm.c b/frame/compat/bla_gemm.c index 557b3f202..044cdf9bb 100644 --- a/frame/compat/bla_gemm.c +++ b/frame/compat/bla_gemm.c @@ -666,8 +666,7 @@ void zgemm_ sqp_on = true; } #endif - if( ( ( blis_transa == BLIS_TRANSPOSE ) || ( blis_transa == BLIS_NO_TRANSPOSE ) ) - && ( blis_transb == BLIS_NO_TRANSPOSE) && (sqp_on == true)) + if( ( blis_transb == BLIS_NO_TRANSPOSE) && ( sqp_on == true ) ) { //sqp algo is found better for n > 40 if(bli_gemm_sqp(&alphao, &ao, &bo, &betao, &co, NULL, NULL)==BLIS_SUCCESS) diff --git a/kernels/zen/3/bli_gemm_sqp.c b/kernels/zen/3/bli_gemm_sqp.c index 1622c551c..ceab622bf 100644 --- a/kernels/zen/3/bli_gemm_sqp.c +++ b/kernels/zen/3/bli_gemm_sqp.c @@ -40,16 +40,31 @@ #define BLIS_LOADFIRST 0 #define MEM_ALLOC 1//malloc performs better than bli_malloc. +#define SET_TRANS(X,Y)\ + Y = BLIS_NO_TRANSPOSE;\ + if(bli_obj_has_trans( a ))\ + {\ + Y = BLIS_TRANSPOSE;\ + if(bli_obj_has_conj(a))\ + {\ + Y = BLIS_CONJ_TRANSPOSE;\ + }\ + }\ + else if(bli_obj_has_conj(a))\ + {\ + Y = BLIS_CONJ_NO_TRANSPOSE;\ + } + //Macro for 3m_sqp n loop #define BLI_SQP_ZGEMM_N(MX)\ int j=0;\ for(; j<=(n-nx); j+= nx)\ {\ - status = bli_sqp_zgemm_m8( m, nx, k, a, lda, b+(j*ldb), ldb, c+(j*ldc), ldc, alpha_real, beta_real, isTransA, MX, p_istart, kx, &mem_3m_sqp);\ + status = bli_sqp_zgemm_m8( m, nx, k, a, lda, b+(j*ldb), ldb, c+(j*ldc), ldc, alpha_real, beta_real, transa, MX, p_istart, kx, &mem_3m_sqp);\ }\ if(j