mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Fix problem where uninitialized registers are included in vhaddpd in the Mx1 gemmsup kernels for haswell.
The fix is to use the same (valid) source register twice in the horizontal addition. Change-Id: I96ed39e289aaeeb44be9117074b32bd8d4c19de6
This commit is contained in:
committed by
Nallani Bhaskar
parent
faff30b46a
commit
ea163fc23b
File diff suppressed because it is too large
Load Diff
@@ -1339,20 +1339,6 @@ void bli_dgemmsup_rd_haswell_asm_1x4
|
||||
vperm2f128(imm(0x20), ymm2, ymm0, ymm4 )
|
||||
|
||||
|
||||
vhaddpd( ymm8, ymm5, ymm0 )
|
||||
vextractf128(imm(1), ymm0, xmm1 )
|
||||
vaddpd( xmm0, xmm1, xmm0 )
|
||||
|
||||
vhaddpd( ymm14, ymm11, ymm2 )
|
||||
vextractf128(imm(1), ymm2, xmm1 )
|
||||
vaddpd( xmm2, xmm1, xmm2 )
|
||||
|
||||
vperm2f128(imm(0x20), ymm2, ymm0, ymm5 )
|
||||
|
||||
// xmm4[0:3] = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13)
|
||||
|
||||
|
||||
|
||||
//mov(var(rs_c), rdi) // load rs_c
|
||||
//lea(mem(, rdi, 4), rdi) // rs_c *= sizeof(float)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user