mirror of
https://github.com/amd/blis.git
synced 2026-05-13 02:25:39 +00:00
Workaround for perf regression observed for sgemm
Details:
- Perf regression is observed for certain m,n,k inputs where (m,n,k > 512)
and (m > 4 * n) in BLIS 3.1. The root cause was traced to commit
11dfc176a3 where BLIS_THREAD_RATIO_M was
updated from 2 to 1. This change was not part of BLIS 3.0.6 and hence
resulted in the new perf drop in 3.1.
- This workaround updates the m dimension (doubles it) that is passed as
argument to bli_rntm_set_ways_for_op which is used to determine the ic,jc
work split in the threads. The BLIS_THREAD_RATIO_M is not updated (to 2)
and rather the effect is induced using the doubled m dimension.
AMD-Internal: [CPUPL-1909]
Change-Id: I3b6ec4d4a22154289cb56d8f7db4cb60e5f34afe
This commit is contained in:
committed by
Dipal M Zambare
parent
30038af896
commit
d683c224e8
@@ -173,7 +173,24 @@ void bli_gemm_front
|
||||
// or the inlined code above.
|
||||
bli_obj_swap_pack_schemas( &a_local, &b_local );
|
||||
}
|
||||
|
||||
|
||||
dim_t m_dim_local = bli_obj_length( &c_local );
|
||||
dim_t n_dim_local = bli_obj_width( &c_local );
|
||||
dim_t k_dim_local = bli_obj_width( &a_local );
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
// Regression observed in sgemm native path in cases where m >= 4 * n
|
||||
// after BLIS_THREAD_RATIO_M updated from 2 to 1 as part of commit
|
||||
// 11dfc176a3c422729f453f6c23204cf023e9954d. Temporary workaround for
|
||||
// the issue.
|
||||
if( bli_obj_is_float( &c_local ) &&
|
||||
( n_dim_local >= 1024 ) &&
|
||||
( k_dim_local >= 1024 ) &&
|
||||
( m_dim_local >= ( 4 * n_dim_local ) ) )
|
||||
{
|
||||
m_dim_local *= 2;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Parse and interpret the contents of the rntm_t object to properly
|
||||
// set the ways of parallelism for each loop, and then make any
|
||||
// additional modifications necessary for the current operation.
|
||||
@@ -181,9 +198,9 @@ void bli_gemm_front
|
||||
(
|
||||
BLIS_GEMM,
|
||||
BLIS_LEFT, // ignored for gemm/hemm/symm
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
m_dim_local,
|
||||
n_dim_local,
|
||||
k_dim_local,
|
||||
rntm
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user