From 7dc78b49f97e6b3cd6d72fcdc588ace534d0e700 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Tue, 15 Aug 2017 10:02:25 -0500 Subject: [PATCH] Add vzeroupper to Intel AVX kernels. --- kernels/x86_64/haswell/3/bli_gemm_asm_d12x4.c | 4 ++++ kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c | 4 ++++ kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c | 8 ++++++++ kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c | 8 ++++++++ kernels/x86_64/sandybridge/3/bli_gemm_asm_d8x4.c | 10 +++++++++- 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/kernels/x86_64/haswell/3/bli_gemm_asm_d12x4.c b/kernels/x86_64/haswell/3/bli_gemm_asm_d12x4.c index 5bc2dd4ba..2088e030a 100644 --- a/kernels/x86_64/haswell/3/bli_gemm_asm_d12x4.c +++ b/kernels/x86_64/haswell/3/bli_gemm_asm_d12x4.c @@ -634,6 +634,8 @@ void bli_sgemm_asm_24x4 " \n\t" " \n\t" ".SDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -1253,6 +1255,8 @@ void bli_dgemm_asm_12x4 " \n\t" " \n\t" ".DDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) diff --git a/kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c b/kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c index c92612b07..5eb0f0732 100644 --- a/kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c +++ b/kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c @@ -600,6 +600,8 @@ void bli_sgemm_asm_4x24 " \n\t" " \n\t" ".SDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -1186,6 +1188,8 @@ void bli_dgemm_asm_4x12 " \n\t" " \n\t" ".DDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) diff --git a/kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c b/kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c index 5bd2d92e5..78b294053 100644 --- a/kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c +++ b/kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c @@ -595,6 +595,8 @@ void bli_sgemm_asm_6x16 " \n\t" " \n\t" ".SDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -1175,6 +1177,8 @@ void bli_dgemm_asm_6x8 " \n\t" " \n\t" ".DDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -1702,6 +1706,8 @@ void bli_cgemm_asm_3x8 " \n\t" " \n\t" ".CDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -2228,6 +2234,8 @@ void bli_zgemm_asm_3x4 " \n\t" " \n\t" ".ZDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) diff --git a/kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c b/kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c index f173947c3..9796e27ef 100644 --- a/kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c +++ b/kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c @@ -596,6 +596,8 @@ void bli_sgemm_asm_16x6 " \n\t" " \n\t" ".SDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -1176,6 +1178,8 @@ void bli_dgemm_asm_8x6 " \n\t" " \n\t" ".DDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -1703,6 +1707,8 @@ void bli_cgemm_asm_8x3 " \n\t" " \n\t" ".CDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -2229,6 +2235,8 @@ void bli_zgemm_asm_4x3 " \n\t" " \n\t" ".ZDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) diff --git a/kernels/x86_64/sandybridge/3/bli_gemm_asm_d8x4.c b/kernels/x86_64/sandybridge/3/bli_gemm_asm_d8x4.c index f8db398ca..f19f053fc 100644 --- a/kernels/x86_64/sandybridge/3/bli_gemm_asm_d8x4.c +++ b/kernels/x86_64/sandybridge/3/bli_gemm_asm_d8x4.c @@ -991,7 +991,9 @@ void bli_sgemm_asm_8x8 " \n\t" " \n\t" ".SDONE: \n\t" - " \n\t" + " \n\t" + "vzeroupper \n\t" + " \n\t" : // output operands (none) : // input operands @@ -1658,6 +1660,8 @@ void bli_dgemm_asm_8x4 " \n\t" " \n\t" ".DDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -2611,6 +2615,8 @@ void bli_cgemm_asm_8x4 " \n\t" " \n\t" ".CDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none) @@ -3453,6 +3459,8 @@ void bli_zgemm_asm_4x4 " \n\t" " \n\t" ".ZDONE: \n\t" + " \n\t" + "vzeroupper \n\t" " \n\t" : // output operands (none)