mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Fixed out-of-bounds bug in sup s6x16m haswell kernel.
Details:
- Fixed another out-of-bounds read access bug in the haswell sup
assembly kernels. This bug is similar to the one fixed in 17b0caa
and affects bli_sgemmsup_rv_haswell_asm_6x2m(). Thanks to Madeesh
Kannan for reporting this bug (and a suitable fix) in #635.
- CREDITS file update.
Change-Id: I10ccf4d4f471d93e8c8cc4df422c686438fb04e9
This commit is contained in:
committed by
Nallani Bhaskar
parent
4b1663213c
commit
faff30b46a
1
CREDITS
1
CREDITS
@@ -46,6 +46,7 @@ but many others have contributed code and feedback, including
|
||||
Matthew Honnibal @honnibal
|
||||
Stefan Husmann @stefanhusmann
|
||||
Francisco Igual @figual (Universidad Complutense de Madrid)
|
||||
Madeesh Kannan @shadeMe
|
||||
Tony Kelman @tkelman
|
||||
Lee Killough @leekillough (Cray)
|
||||
Mike Kistler @mkistler (IBM, Austin Research Laboratory)
|
||||
|
||||
@@ -4475,34 +4475,39 @@ void bli_sgemmsup_rv_haswell_asm_6x2m
|
||||
|
||||
|
||||
label(.SROWSTORED)
|
||||
|
||||
|
||||
vfmadd231ps(mem(rcx, 0*32), xmm3, xmm4)
|
||||
|
||||
vmovsd(mem(rcx, 0*32), xmm0)
|
||||
vfmadd231ps(xmm0, xmm3, xmm4)
|
||||
vmovsd(xmm4, mem(rcx, 0*32))
|
||||
add(rdi, rcx)
|
||||
|
||||
|
||||
vfmadd231ps(mem(rcx, 0*32), xmm3, xmm6)
|
||||
|
||||
|
||||
vmovsd(mem(rcx, 0*32), xmm0)
|
||||
vfmadd231ps(xmm0, xmm3, xmm6)
|
||||
vmovsd(xmm6, mem(rcx, 0*32))
|
||||
add(rdi, rcx)
|
||||
|
||||
|
||||
vfmadd231ps(mem(rcx, 0*32), xmm3, xmm8)
|
||||
|
||||
|
||||
vmovsd(mem(rcx, 0*32), xmm0)
|
||||
vfmadd231ps(xmm0, xmm3, xmm8)
|
||||
vmovsd(xmm8, mem(rcx, 0*32))
|
||||
add(rdi, rcx)
|
||||
|
||||
|
||||
vfmadd231ps(mem(rcx, 0*32), xmm3, xmm10)
|
||||
|
||||
|
||||
vmovsd(mem(rcx, 0*32), xmm0)
|
||||
vfmadd231ps(xmm0, xmm3, xmm10)
|
||||
vmovsd(xmm10, mem(rcx, 0*32))
|
||||
add(rdi, rcx)
|
||||
|
||||
|
||||
vfmadd231ps(mem(rcx, 0*32), xmm3, xmm12)
|
||||
|
||||
|
||||
vmovsd(mem(rcx, 0*32), xmm0)
|
||||
vfmadd231ps(xmm0, xmm3, xmm12)
|
||||
vmovsd(xmm12, mem(rcx, 0*32))
|
||||
add(rdi, rcx)
|
||||
|
||||
|
||||
vfmadd231ps(mem(rcx, 0*32), xmm3, xmm14)
|
||||
|
||||
|
||||
vmovsd(mem(rcx, 0*32), xmm0)
|
||||
vfmadd231ps(xmm0, xmm3, xmm14)
|
||||
vmovsd(xmm14, mem(rcx, 0*32))
|
||||
//add(rdi, rcx)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user