mirror of
https://github.com/amd/blis.git
synced 2026-05-13 02:25:39 +00:00
Reduced number of threads in dgemm for small dimensions
- Number of threads are reduced to 1 when the dimensions are very low. - Removed uninitialized xmm compilation warning in trsm small Change-Id: I23262fb82729af5b98ded5d36f5eed45d5255d5b
This commit is contained in:
@@ -574,6 +574,10 @@ void bli_nthreads_optimum(
|
||||
if(n < 15) n_threads_ideal = 1;
|
||||
else n_threads_ideal = 4;
|
||||
}
|
||||
else if( ( m < 34) && (k < 68) && ( m < 34))
|
||||
{
|
||||
n_threads_ideal = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(n < 20) n_threads_ideal = 1;
|
||||
|
||||
@@ -2847,6 +2847,7 @@ BLIS_INLINE err_t dtrsm_XAltB_ref
|
||||
#define BLIS_PRE_STRSM_SMALL_3N_2M(AlphaVal,b11,cs_b)\
|
||||
ymm15 = _mm256_broadcast_ss((float const *)&AlphaVal); /*register to hold alpha*/\
|
||||
\
|
||||
xmm5 = _mm_setzero_ps();\
|
||||
xmm5 = _mm_loadl_pi(xmm5,(__m64*)(b11));\
|
||||
ymm6 = _mm256_insertf128_ps(ymm0, xmm5, 0);\
|
||||
ymm3 = _mm256_fmsub_ps(ymm6, ymm15, ymm3);\
|
||||
@@ -3009,6 +3010,7 @@ BLIS_INLINE err_t dtrsm_XAltB_ref
|
||||
#define BLIS_PRE_STRSM_SMALL_2N_2M(AlphaVal,b11,cs_b)\
|
||||
ymm15 = _mm256_broadcast_ss((float const *)&AlphaVal); /*register to hold alpha*/\
|
||||
\
|
||||
xmm5 = _mm_setzero_ps();\
|
||||
xmm5 = _mm_loadl_pi(xmm5,(__m64*)(b11));\
|
||||
ymm6 = _mm256_insertf128_ps(ymm0, xmm5, 0);\
|
||||
ymm3 = _mm256_fmsub_ps(ymm6, ymm15, ymm3);\
|
||||
@@ -3116,6 +3118,7 @@ BLIS_INLINE err_t dtrsm_XAltB_ref
|
||||
#define BLIS_PRE_STRSM_SMALL_1N_2M(AlphaVal,b11,cs_b)\
|
||||
ymm15 = _mm256_broadcast_ss((float const *)&AlphaVal); /*register to hold alpha*/\
|
||||
\
|
||||
xmm5 = _mm_setzero_ps();\
|
||||
xmm5 = _mm_loadl_pi(xmm5,(__m64*)(b11));\
|
||||
ymm6 = _mm256_insertf128_ps(ymm0, xmm5, 0);\
|
||||
ymm3 = _mm256_fmsub_ps(ymm6, ymm15, ymm3);
|
||||
|
||||
Reference in New Issue
Block a user