mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Merge pull request #133 from devinamatthews/haswell-packdim
Fix prefetching in haswell ukernel
This commit is contained in:
@@ -734,6 +734,8 @@ void bli_dgemm_asm_6x8
|
||||
"vmovaps -1 * 32(%%rbx), %%ymm1 \n\t"
|
||||
" \n\t"
|
||||
" \n\t" // iteration 1
|
||||
"prefetcht0 72 * 8(%%rax) \n\t"
|
||||
" \n\t"
|
||||
"vbroadcastsd 6 * 8(%%rax), %%ymm2 \n\t"
|
||||
"vbroadcastsd 7 * 8(%%rax), %%ymm3 \n\t"
|
||||
"vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t"
|
||||
@@ -759,7 +761,7 @@ void bli_dgemm_asm_6x8
|
||||
"vmovaps 1 * 32(%%rbx), %%ymm1 \n\t"
|
||||
" \n\t"
|
||||
" \n\t" // iteration 2
|
||||
"prefetcht0 76 * 8(%%rax) \n\t"
|
||||
"prefetcht0 80 * 8(%%rax) \n\t"
|
||||
" \n\t"
|
||||
"vbroadcastsd 12 * 8(%%rax), %%ymm2 \n\t"
|
||||
"vbroadcastsd 13 * 8(%%rax), %%ymm3 \n\t"
|
||||
|
||||
Reference in New Issue
Block a user