mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Fixed copy-paste bugs in edge-case sup kernels.
Details: - Fixed bugs in two sup kernels, bli_dgemmsup_rv_haswell_asm_1x6() and bli_dgemmsup_rd_haswell_asm_1x4(), which involved extraneous assembly instructions that were left over from when the kernels were first written. These instructions would cause segmentation faults in some situations where extra memory was not allocated beyond the end of the matrix buffers. Thanks to Kiran Varaganti for reporting these bugs and to Bhaskar Nallani for identifying the cause and solution.
This commit is contained in:
@@ -1297,7 +1297,6 @@ void bli_dgemmsup_rd_haswell_asm_1x4
|
||||
// which would destory intermediate results.
|
||||
|
||||
vmovsd(mem(rax ), xmm0)
|
||||
vmovsd(mem(rax, r8, 1), xmm1)
|
||||
add(imm(1*8), rax) // a += 1*cs_a = 1*8;
|
||||
|
||||
vmovsd(mem(rbx ), xmm3)
|
||||
|
||||
@@ -2828,7 +2828,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
@@ -2845,7 +2844,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
@@ -2862,7 +2860,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
@@ -2879,7 +2876,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
@@ -2914,7 +2910,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
|
||||
Reference in New Issue
Block a user