Merge pull request #133 from devinamatthews/haswell-packdim

Fix prefetching in haswell ukernel
This commit is contained in:
Devin Matthews
2017-07-20 10:58:39 -05:00
committed by GitHub

View File

@@ -734,6 +734,8 @@ void bli_dgemm_asm_6x8
"vmovaps -1 * 32(%%rbx), %%ymm1 \n\t"
" \n\t"
" \n\t" // iteration 1
"prefetcht0 72 * 8(%%rax) \n\t"
" \n\t"
"vbroadcastsd 6 * 8(%%rax), %%ymm2 \n\t"
"vbroadcastsd 7 * 8(%%rax), %%ymm3 \n\t"
"vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t"
@@ -759,7 +761,7 @@ void bli_dgemm_asm_6x8
"vmovaps 1 * 32(%%rbx), %%ymm1 \n\t"
" \n\t"
" \n\t" // iteration 2
"prefetcht0 76 * 8(%%rax) \n\t"
"prefetcht0 80 * 8(%%rax) \n\t"
" \n\t"
"vbroadcastsd 12 * 8(%%rax), %%ymm2 \n\t"
"vbroadcastsd 13 * 8(%%rax), %%ymm3 \n\t"