Reduced number of threads in dgemm for small dimensions

- Number of threads are reduced to 1 when the dimensions
  are very low.
- Removed uninitialized xmm compilation warning in trsm small

Change-Id: I23262fb82729af5b98ded5d36f5eed45d5255d5b
This commit is contained in:
Nallani Bhaskar
2021-12-15 15:11:08 +05:30
parent 8201bcfdaf
commit c2df5eac1c
2 changed files with 7 additions and 0 deletions

View File

@@ -574,6 +574,10 @@ void bli_nthreads_optimum(
if(n < 15) n_threads_ideal = 1;
else n_threads_ideal = 4;
}
else if( ( m < 34) && (k < 68) && ( m < 34))
{
n_threads_ideal = 1;
}
else
{
if(n < 20) n_threads_ideal = 1;

View File

@@ -2847,6 +2847,7 @@ BLIS_INLINE err_t dtrsm_XAltB_ref
#define BLIS_PRE_STRSM_SMALL_3N_2M(AlphaVal,b11,cs_b)\
ymm15 = _mm256_broadcast_ss((float const *)&AlphaVal); /*register to hold alpha*/\
\
xmm5 = _mm_setzero_ps();\
xmm5 = _mm_loadl_pi(xmm5,(__m64*)(b11));\
ymm6 = _mm256_insertf128_ps(ymm0, xmm5, 0);\
ymm3 = _mm256_fmsub_ps(ymm6, ymm15, ymm3);\
@@ -3009,6 +3010,7 @@ BLIS_INLINE err_t dtrsm_XAltB_ref
#define BLIS_PRE_STRSM_SMALL_2N_2M(AlphaVal,b11,cs_b)\
ymm15 = _mm256_broadcast_ss((float const *)&AlphaVal); /*register to hold alpha*/\
\
xmm5 = _mm_setzero_ps();\
xmm5 = _mm_loadl_pi(xmm5,(__m64*)(b11));\
ymm6 = _mm256_insertf128_ps(ymm0, xmm5, 0);\
ymm3 = _mm256_fmsub_ps(ymm6, ymm15, ymm3);\
@@ -3116,6 +3118,7 @@ BLIS_INLINE err_t dtrsm_XAltB_ref
#define BLIS_PRE_STRSM_SMALL_1N_2M(AlphaVal,b11,cs_b)\
ymm15 = _mm256_broadcast_ss((float const *)&AlphaVal); /*register to hold alpha*/\
\
xmm5 = _mm_setzero_ps();\
xmm5 = _mm_loadl_pi(xmm5,(__m64*)(b11));\
ymm6 = _mm256_insertf128_ps(ymm0, xmm5, 0);\
ymm3 = _mm256_fmsub_ps(ymm6, ymm15, ymm3);