From faff30b46a6c3573a33912e73f555a1f625e3394 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 26 Jul 2022 17:29:32 -0500 Subject: [PATCH] Fixed out-of-bounds bug in sup s6x16m haswell kernel. Details: - Fixed another out-of-bounds read access bug in the haswell sup assembly kernels. This bug is similar to the one fixed in 17b0caa and affects bli_sgemmsup_rv_haswell_asm_6x2m(). Thanks to Madeesh Kannan for reporting this bug (and a suitable fix) in #635. - CREDITS file update. Change-Id: I10ccf4d4f471d93e8c8cc4df422c686438fb04e9 --- CREDITS | 1 + .../3/sup/bli_gemmsup_rv_haswell_asm_s6x16m.c | 41 +++++++++++-------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/CREDITS b/CREDITS index d68bcca01..fd0bcb5b3 100644 --- a/CREDITS +++ b/CREDITS @@ -46,6 +46,7 @@ but many others have contributed code and feedback, including Matthew Honnibal @honnibal Stefan Husmann @stefanhusmann Francisco Igual @figual (Universidad Complutense de Madrid) + Madeesh Kannan @shadeMe Tony Kelman @tkelman Lee Killough @leekillough (Cray) Mike Kistler @mkistler (IBM, Austin Research Laboratory) diff --git a/kernels/haswell/3/sup/bli_gemmsup_rv_haswell_asm_s6x16m.c b/kernels/haswell/3/sup/bli_gemmsup_rv_haswell_asm_s6x16m.c index 426e5157e..877e636b8 100644 --- a/kernels/haswell/3/sup/bli_gemmsup_rv_haswell_asm_s6x16m.c +++ b/kernels/haswell/3/sup/bli_gemmsup_rv_haswell_asm_s6x16m.c @@ -4475,34 +4475,39 @@ void bli_sgemmsup_rv_haswell_asm_6x2m label(.SROWSTORED) - - - vfmadd231ps(mem(rcx, 0*32), xmm3, xmm4) + + vmovsd(mem(rcx, 0*32), xmm0) + vfmadd231ps(xmm0, xmm3, xmm4) vmovsd(xmm4, mem(rcx, 0*32)) add(rdi, rcx) - - - vfmadd231ps(mem(rcx, 0*32), xmm3, xmm6) + + + vmovsd(mem(rcx, 0*32), xmm0) + vfmadd231ps(xmm0, xmm3, xmm6) vmovsd(xmm6, mem(rcx, 0*32)) add(rdi, rcx) - - - vfmadd231ps(mem(rcx, 0*32), xmm3, xmm8) + + + vmovsd(mem(rcx, 0*32), xmm0) + vfmadd231ps(xmm0, xmm3, xmm8) vmovsd(xmm8, mem(rcx, 0*32)) add(rdi, rcx) - - - vfmadd231ps(mem(rcx, 0*32), xmm3, xmm10) + + + vmovsd(mem(rcx, 0*32), xmm0) + vfmadd231ps(xmm0, xmm3, xmm10) vmovsd(xmm10, mem(rcx, 0*32)) add(rdi, rcx) - - - vfmadd231ps(mem(rcx, 0*32), xmm3, xmm12) + + + vmovsd(mem(rcx, 0*32), xmm0) + vfmadd231ps(xmm0, xmm3, xmm12) vmovsd(xmm12, mem(rcx, 0*32)) add(rdi, rcx) - - - vfmadd231ps(mem(rcx, 0*32), xmm3, xmm14) + + + vmovsd(mem(rcx, 0*32), xmm0) + vfmadd231ps(xmm0, xmm3, xmm14) vmovsd(xmm14, mem(rcx, 0*32)) //add(rdi, rcx)