mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
Fix for segmentation crash in dgemmsup kernels
Description: [AMD Internal]: CPUPL-1336 Removed extra/un-nesseary loads in dgemmmsup kernels which are accessing the memory beyond the boundaries and causing segmentation issue. Kernels: bli_dgemmsup_rd_haswell_asm_1x4 bli_dgemmsup_rv_haswell_asm_1x6 Change-Id: Idaeed36ebd9f13550943394a37e372b8d015b2d3
This commit is contained in:
committed by
Dipal M Zambare
parent
53a33f1afb
commit
99e381b02f
@@ -1297,7 +1297,6 @@ void bli_dgemmsup_rd_haswell_asm_1x4
|
||||
// which would destory intermediate results.
|
||||
|
||||
vmovsd(mem(rax ), xmm0)
|
||||
vmovsd(mem(rax, r8, 1), xmm1)
|
||||
add(imm(1*8), rax) // a += 1*cs_a = 1*8;
|
||||
|
||||
vmovsd(mem(rbx ), xmm3)
|
||||
|
||||
@@ -2828,7 +2828,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
@@ -2845,7 +2844,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
@@ -2862,7 +2860,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
@@ -2879,7 +2876,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
@@ -2914,7 +2910,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
|
||||
add(r10, rbx) // b += rs_b;
|
||||
|
||||
vbroadcastsd(mem(rax ), ymm2)
|
||||
vbroadcastsd(mem(rax, r8, 1), ymm3)
|
||||
add(r9, rax) // a += cs_a;
|
||||
vfmadd231pd(ymm0, ymm2, ymm4)
|
||||
vfmadd231pd(ymm1, ymm2, ymm5)
|
||||
|
||||
Reference in New Issue
Block a user