mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Merge branch 'dev' of github.com:flame/blis into dev
This commit is contained in:
@@ -109,7 +109,7 @@ void bli_sgemm_penryn_asm_8x4
|
||||
|
||||
label(.SLOOPKITER) // MAIN LOOP
|
||||
|
||||
prefetch(0, mem(4*35+1)*8(rax))
|
||||
prefetch(0, mem(rax, (4*35+1)*8))
|
||||
|
||||
addps(xmm6, xmm10) // iteration 0
|
||||
addps(xmm3, xmm14)
|
||||
@@ -917,8 +917,8 @@ void bli_dgemm_penryn_asm_4x4
|
||||
|
||||
label(.DLOOPKITER) // MAIN LOOP
|
||||
|
||||
prefetch(0, mem(4*35+1)*8(rax))
|
||||
//prefetch(0, mem(8*97+4)*8(rax))
|
||||
prefetch(0, mem(rax, (4*35+1)*8))
|
||||
//prefetch(0, mem(rax, (8*97+4)*8))
|
||||
|
||||
//prefetch(0, mem(r11, 67*4*8)) // prefetch a_next[0]
|
||||
|
||||
@@ -985,8 +985,8 @@ void bli_dgemm_penryn_asm_4x4
|
||||
movaps(mem(rax, -3*16), xmm1)
|
||||
|
||||
|
||||
prefetch(0, mem(4*37+1)*8(rax))
|
||||
//prefetch(0, mem(8*97+12)*8(rax))
|
||||
prefetch(0, mem(rax, (4*37+1)*8))
|
||||
//prefetch(0, mem(rax, (8*97+12)*8))
|
||||
|
||||
//prefetch(0, mem(r11, 69*4*8)) // prefetch a_next[8]
|
||||
//sub(imm(-4*4*8), r11) // a_next += 4*4 (unroll x mr)
|
||||
|
||||
@@ -125,7 +125,7 @@ void bli_dgemmtrsm_l_penryn_asm_4x4
|
||||
label(.LOOPKITER) // MAIN LOOP
|
||||
|
||||
//prefetch(0, mem(rax, 1264))
|
||||
prefetch(0, mem(4*35+1)*8(rax))
|
||||
prefetch(0, mem(rax, (4*35+1)*8))
|
||||
|
||||
addpd(xmm3, xmm11) // iteration 0
|
||||
movaps(mem(rbx, -7*16), xmm3)
|
||||
@@ -189,7 +189,7 @@ void bli_dgemmtrsm_l_penryn_asm_4x4
|
||||
movaps(mem(rax, -3*16), xmm1)
|
||||
|
||||
//prefetch(0, mem(rax, 1328))
|
||||
prefetch(0, mem(4*37+1)*8(rax))
|
||||
prefetch(0, mem(rax, (4*37+1)*8))
|
||||
|
||||
addpd(xmm3, xmm11) // iteration 2
|
||||
movaps(mem(rbx, -3*16), xmm3)
|
||||
|
||||
Reference in New Issue
Block a user