Disabled no post-ops path in lpgemm f32 kernels for few gcc versions

Guarded np (no post-ops) path in f32 API with a macro 
 as a workaround as gcc 11.4 and 11.2 are giving accuracy issues 
 with np path.
This commit is contained in:
Bhaskar, Nallani
2025-09-22 15:52:21 +05:30
committed by GitHub
parent 807de2a990
commit db3134ed6d
3 changed files with 20 additions and 0 deletions

View File

@@ -47,6 +47,11 @@
//#define DUMP_JIT_CODE
#endif
#if ( defined( BLIS_GCC ) && ( ( __GNUC__ >= 12 ) ) )
#define BLIS_GCC_12_ABOVE
#endif
typedef void (*lpgemm_m_fringe_f32_ker_ft)
(
const dim_t k0,

View File

@@ -44,8 +44,13 @@
LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_6x16m)
{
#ifdef BLIS_GCC_12_ABOVE
if(post_ops_list->op_code == POST_OPS_DISABLE)
{
//When there are no post-ops to do after GEMM this np path gives
//better performance as the code size improves significantly.
// This path has accuracy issues for GCC Version < 12 so disabled for those
lpgemm_rowvar_f32f32f32of32_6x16m_np
(
m0, n0, k0,
@@ -58,6 +63,9 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_6x16m)
return;
}
#endif
//Call RD kernels if B is transposed
if(rs_b == 1 && n0 != 1)
{

View File

@@ -44,8 +44,14 @@
LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
{
#ifdef BLIS_GCC_12_ABOVE
if(post_ops_list->op_code == POST_OPS_DISABLE)
{
//When there are no post-ops to do after GEMM this np path gives
//better performance as the code size improves significantly
// This path has accuracy issues for GCC Version < 12 so disabled for those
lpgemm_rowvar_f32f32f32of32_avx512_6x64m_np
(
m0, n0, k0,
@@ -57,6 +63,7 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
);
return;
}
#endif
//Call RD kernels if B is transposed
if(rs_b == 1 && n0 != 1 )