mirror of
https://github.com/amd/blis.git
synced 2026-04-20 15:48:50 +00:00
Disabled no post-ops path in lpgemm f32 kernels for few gcc versions
Guarded np (no post-ops) path in f32 API with a macro as a workaround as gcc 11.4 and 11.2 are giving accuracy issues with np path.
This commit is contained in:
@@ -44,8 +44,13 @@
|
||||
|
||||
LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_6x16m)
|
||||
{
|
||||
#ifdef BLIS_GCC_12_ABOVE
|
||||
|
||||
if(post_ops_list->op_code == POST_OPS_DISABLE)
|
||||
{
|
||||
//When there are no post-ops to do after GEMM this np path gives
|
||||
//better performance as the code size improves significantly.
|
||||
// This path has accuracy issues for GCC Version < 12 so disabled for those
|
||||
lpgemm_rowvar_f32f32f32of32_6x16m_np
|
||||
(
|
||||
m0, n0, k0,
|
||||
@@ -58,6 +63,9 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_6x16m)
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
//Call RD kernels if B is transposed
|
||||
if(rs_b == 1 && n0 != 1)
|
||||
{
|
||||
|
||||
@@ -44,8 +44,14 @@
|
||||
|
||||
LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
|
||||
{
|
||||
|
||||
#ifdef BLIS_GCC_12_ABOVE
|
||||
|
||||
if(post_ops_list->op_code == POST_OPS_DISABLE)
|
||||
{
|
||||
//When there are no post-ops to do after GEMM this np path gives
|
||||
//better performance as the code size improves significantly
|
||||
// This path has accuracy issues for GCC Version < 12 so disabled for those
|
||||
lpgemm_rowvar_f32f32f32of32_avx512_6x64m_np
|
||||
(
|
||||
m0, n0, k0,
|
||||
@@ -57,6 +63,7 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
|
||||
);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
//Call RD kernels if B is transposed
|
||||
if(rs_b == 1 && n0 != 1 )
|
||||
|
||||
Reference in New Issue
Block a user