From e80a4537846416719c067ae08a53aeda978c572d Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Thu, 18 Sep 2014 10:24:20 -0500 Subject: [PATCH] Fixed bug introduced by bugfix in 25b258d. Details: - We actually need to check alignment of lda*sizeof(double) and NOT a+lda because in the latter case, alignment could cancel out and still allow the optimized code to run when it shouldn't. Thanks to Devin for pointing this out. --- kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c | 2 +- kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.c | 2 +- kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c b/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c index da87112a4..e57e29669 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c +++ b/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c @@ -92,7 +92,7 @@ void bli_daxpyf_opt_var1( use_ref = TRUE; } else if ( inca != 1 || incx != 1 || incy != 1 || - bli_is_unaligned_to( a+lda, 16 ) ) + bli_is_unaligned_to( lda*sizeof(double), 16 ) ) { use_ref = TRUE; } diff --git a/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.c b/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.c index e5a7fead3..2f907a8f7 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.c +++ b/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.c @@ -116,7 +116,7 @@ void bli_ddotxaxpyf_opt_var1( conj_t conjat, use_ref = TRUE; } else if ( inca != 1 || incw != 1 || incx != 1 || incy != 1 || incz != 1 || - bli_is_unaligned_to( a+lda, 16 ) ) + bli_is_unaligned_to( lda*sizeof(double), 16 ) ) { use_ref = TRUE; } diff --git a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c index e09e8fe92..1dab93b5c 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c +++ b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c @@ -104,7 +104,7 @@ void bli_ddotxf_opt_var1( use_ref = TRUE; } else if ( inca != 1 || incx != 1 || incy != 1 || - bli_is_unaligned_to( a+lda, 16 ) ) + bli_is_unaligned_to( lda*sizeof(double), 16 ) ) { use_ref = TRUE; }