Fix problem where uninitialized registers are included in vhaddpd in the Mx1 gemmsup kernels for haswell.

The fix is to use the same (valid) source register twice in the horizontal addition.

Change-Id: I96ed39e289aaeeb44be9117074b32bd8d4c19de6
This commit is contained in:
Devin Matthews
2021-09-16 10:59:37 -05:00
committed by Nallani Bhaskar
parent faff30b46a
commit ea163fc23b
2 changed files with 320 additions and 334 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1339,20 +1339,6 @@ void bli_dgemmsup_rd_haswell_asm_1x4
vperm2f128(imm(0x20), ymm2, ymm0, ymm4 )
vhaddpd( ymm8, ymm5, ymm0 )
vextractf128(imm(1), ymm0, xmm1 )
vaddpd( xmm0, xmm1, xmm0 )
vhaddpd( ymm14, ymm11, ymm2 )
vextractf128(imm(1), ymm2, xmm1 )
vaddpd( xmm2, xmm1, xmm2 )
vperm2f128(imm(0x20), ymm2, ymm0, ymm5 )
// xmm4[0:3] = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13)
//mov(var(rs_c), rdi) // load rs_c
//lea(mem(, rdi, 4), rdi) // rs_c *= sizeof(float)