Fix for segmentation crash in dgemmsup kernels

Description:

[AMD Internal]: CPUPL-1336

Removed extra/un-nesseary loads in dgemmmsup kernels which are
accessing the memory beyond the boundaries and causing segmentation
issue.

Kernels:
bli_dgemmsup_rd_haswell_asm_1x4
bli_dgemmsup_rv_haswell_asm_1x6

Change-Id: Idaeed36ebd9f13550943394a37e372b8d015b2d3
This commit is contained in:
bhaskarn
2020-11-24 20:16:48 +05:30
committed by Dipal M Zambare
parent 53a33f1afb
commit 99e381b02f
2 changed files with 0 additions and 6 deletions

View File

@@ -1297,7 +1297,6 @@ void bli_dgemmsup_rd_haswell_asm_1x4
// which would destory intermediate results.
vmovsd(mem(rax ), xmm0)
vmovsd(mem(rax, r8, 1), xmm1)
add(imm(1*8), rax) // a += 1*cs_a = 1*8;
vmovsd(mem(rbx ), xmm3)

View File

@@ -2828,7 +2828,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
add(r10, rbx) // b += rs_b;
vbroadcastsd(mem(rax ), ymm2)
vbroadcastsd(mem(rax, r8, 1), ymm3)
add(r9, rax) // a += cs_a;
vfmadd231pd(ymm0, ymm2, ymm4)
vfmadd231pd(ymm1, ymm2, ymm5)
@@ -2845,7 +2844,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
add(r10, rbx) // b += rs_b;
vbroadcastsd(mem(rax ), ymm2)
vbroadcastsd(mem(rax, r8, 1), ymm3)
add(r9, rax) // a += cs_a;
vfmadd231pd(ymm0, ymm2, ymm4)
vfmadd231pd(ymm1, ymm2, ymm5)
@@ -2862,7 +2860,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
add(r10, rbx) // b += rs_b;
vbroadcastsd(mem(rax ), ymm2)
vbroadcastsd(mem(rax, r8, 1), ymm3)
add(r9, rax) // a += cs_a;
vfmadd231pd(ymm0, ymm2, ymm4)
vfmadd231pd(ymm1, ymm2, ymm5)
@@ -2879,7 +2876,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
add(r10, rbx) // b += rs_b;
vbroadcastsd(mem(rax ), ymm2)
vbroadcastsd(mem(rax, r8, 1), ymm3)
add(r9, rax) // a += cs_a;
vfmadd231pd(ymm0, ymm2, ymm4)
vfmadd231pd(ymm1, ymm2, ymm5)
@@ -2914,7 +2910,6 @@ void bli_dgemmsup_rv_haswell_asm_1x6
add(r10, rbx) // b += rs_b;
vbroadcastsd(mem(rax ), ymm2)
vbroadcastsd(mem(rax, r8, 1), ymm3)
add(r9, rax) // a += cs_a;
vfmadd231pd(ymm0, ymm2, ymm4)
vfmadd231pd(ymm1, ymm2, ymm5)