From 54988e8dca44475610bcaee5a7bc1c40e8921402 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Mon, 8 Apr 2013 19:08:43 -0500 Subject: [PATCH] Fixed a performance bug in trsm. Details: - Fixed a bug in the reference implementations of the gemmtrsm wrappers (bli_gemmtrsm_l_ref_mxn.c and bli_gemmtrsm_u_ref_mxn.c) whereby the reference gemm microkernel was hard-coded, and thus always called, even when GEMM_UKERNEL was defined to point to an optimzied microkernel. This manifested as artificially low trsm performance for all problem sizes, but especially for small problem sizes as it only affected blocks of A that intersected the diagonal. Thanks to Mike Kistler of IBM for helping me find this bug. --- frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c | 2 +- frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c b/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c index 070267a3a..33bddb233 100644 --- a/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c +++ b/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c @@ -69,5 +69,5 @@ void PASTEMAC(ch,varname)( \ c, rs_c, cs_c ); \ } -INSERT_GENTFUNC_BASIC2( gemmtrsm_l_ref_mxn, gemm_ref_mxn, trsm_l_ref_mxn ) +INSERT_GENTFUNC_BASIC2( gemmtrsm_l_ref_mxn, GEMM_UKERNEL, TRSM_L_UKERNEL ) diff --git a/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c b/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c index 2237649d9..8b33d4787 100644 --- a/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c +++ b/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c @@ -69,5 +69,5 @@ void PASTEMAC(ch,varname)( \ c, rs_c, cs_c ); \ } -INSERT_GENTFUNC_BASIC2( gemmtrsm_u_ref_mxn, gemm_ref_mxn, trsm_u_ref_mxn ) +INSERT_GENTFUNC_BASIC2( gemmtrsm_u_ref_mxn, GEMM_UKERNEL, TRSM_U_UKERNEL )