From 03106d650e4030d4c9831683448376f92fc52d41 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Fri, 11 Oct 2013 10:40:38 -0500 Subject: [PATCH] Fixed minor perf bug in gemm_ker_var2. Details: - Fixed a minor performance bug in bli_gemm_ker_var2.c (and the experimental bli_gemm_ker_var5.c) whereby the addresses for a_next and b_next are not computed correctly (ie: do not wraparound) at the edge cases. Thanks to Tze Meng for helping me identify this bug. --- frame/3/gemm/bli_gemm_ker_var2.c | 4 ++-- frame/3/gemm/bli_gemm_ker_var5.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index fadffdfa7..8339e9ae5 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -259,11 +259,11 @@ void PASTEMAC(ch,varname)( \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ - if ( i == m_iter - 1 && m_left == 0 ) \ + if ( i == m_iter - 1 ) \ { \ a2 = a_cast; \ b2 = b1 + cstep_b; \ - if ( j == n_iter - 1 && n_left == 0 ) \ + if ( j == n_iter - 1 ) \ b2 = b_cast; \ } \ \ diff --git a/frame/3/gemm/bli_gemm_ker_var5.c b/frame/3/gemm/bli_gemm_ker_var5.c index b737f0241..6845ced93 100644 --- a/frame/3/gemm/bli_gemm_ker_var5.c +++ b/frame/3/gemm/bli_gemm_ker_var5.c @@ -256,7 +256,7 @@ void PASTEMAC(ch,varname)( \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ - if ( i == m_iter - 1 && m_left == 0 ) \ + if ( i == m_iter - 1 ) \ { \ a2 = a_cast; \ } \