Merge pull request #150 from devinamatthews/vzeroupper

Add vzeroupper to Intel AVX kernels.
This commit is contained in:
Devin Matthews
2017-08-15 15:17:21 -05:00
committed by GitHub
5 changed files with 33 additions and 1 deletions

View File

@@ -634,6 +634,8 @@ void bli_sgemm_asm_24x4
" \n\t"
" \n\t"
".SDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -1253,6 +1255,8 @@ void bli_dgemm_asm_12x4
" \n\t"
" \n\t"
".DDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)

View File

@@ -600,6 +600,8 @@ void bli_sgemm_asm_4x24
" \n\t"
" \n\t"
".SDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -1186,6 +1188,8 @@ void bli_dgemm_asm_4x12
" \n\t"
" \n\t"
".DDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)

View File

@@ -595,6 +595,8 @@ void bli_sgemm_asm_6x16
" \n\t"
" \n\t"
".SDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -1175,6 +1177,8 @@ void bli_dgemm_asm_6x8
" \n\t"
" \n\t"
".DDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -1702,6 +1706,8 @@ void bli_cgemm_asm_3x8
" \n\t"
" \n\t"
".CDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -2228,6 +2234,8 @@ void bli_zgemm_asm_3x4
" \n\t"
" \n\t"
".ZDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)

View File

@@ -596,6 +596,8 @@ void bli_sgemm_asm_16x6
" \n\t"
" \n\t"
".SDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -1176,6 +1178,8 @@ void bli_dgemm_asm_8x6
" \n\t"
" \n\t"
".DDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -1703,6 +1707,8 @@ void bli_cgemm_asm_8x3
" \n\t"
" \n\t"
".CDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -2229,6 +2235,8 @@ void bli_zgemm_asm_4x3
" \n\t"
" \n\t"
".ZDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)

View File

@@ -991,7 +991,9 @@ void bli_sgemm_asm_8x8
" \n\t"
" \n\t"
".SDONE: \n\t"
" \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
: // input operands
@@ -1658,6 +1660,8 @@ void bli_dgemm_asm_8x4
" \n\t"
" \n\t"
".DDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -2611,6 +2615,8 @@ void bli_cgemm_asm_8x4
" \n\t"
" \n\t"
".CDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)
@@ -3453,6 +3459,8 @@ void bli_zgemm_asm_4x4
" \n\t"
" \n\t"
".ZDONE: \n\t"
" \n\t"
"vzeroupper \n\t"
" \n\t"
: // output operands (none)