mirror of
https://github.com/amd/blis.git
synced 2026-05-12 01:59:59 +00:00
Add vzeroupper to Haswell microkernels. (#524)
Details: - Added vzeroupper instruction to the end of all 'gemm' and 'gemmtrsm' microkernels so as to avoid a performance penalty when mixing AVX and SSE instructions. These vzeroupper instructions were once part of the haswell kernels, but were inadvertently removed during a source code shuffle some time ago when we were managing duplicate 'haswell' and 'zen' kernel sets. Thanks to Devin Matthews for tracking this down and re-inserting the missing instructions. Change-Id: I418fea9fed27ba3ad7d395cf96d1be507955d8e9
This commit is contained in:
committed by
Dipal M Zambare
parent
2a81437bd8
commit
76fbf1233d
@@ -870,7 +870,7 @@ void bli_sgemm_haswell_asm_6x16
|
||||
|
||||
label(.SDONE)
|
||||
|
||||
|
||||
vzeroupper()
|
||||
|
||||
end_asm(
|
||||
: // output operands (none)
|
||||
@@ -1624,6 +1624,7 @@ void bli_dgemm_haswell_asm_6x8
|
||||
|
||||
|
||||
label(.DDONE)
|
||||
|
||||
vzeroupper()
|
||||
|
||||
|
||||
@@ -2158,7 +2159,7 @@ void bli_cgemm_haswell_asm_3x8
|
||||
|
||||
label(.CDONE)
|
||||
|
||||
|
||||
vzeroupper()
|
||||
|
||||
end_asm(
|
||||
: // output operands (none)
|
||||
@@ -2758,7 +2759,7 @@ void bli_zgemm_haswell_asm_3x4
|
||||
|
||||
label(.ZDONE)
|
||||
|
||||
|
||||
vzeroupper()
|
||||
|
||||
end_asm(
|
||||
: // output operands (none)
|
||||
|
||||
Reference in New Issue
Block a user