From 881b05ecd40c7bc0422d3479a02a28b1cb48383f Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Thu, 21 Nov 2019 16:34:27 -0600 Subject: [PATCH] Fixed blastest failure for 'generic' subconfig. Details: - Fixed a subtle and complicated bug that only manifested via the BLAS test drivers in the generic subconfiguration, and possibly any other subconfiguration that did not register complex-domain gemm ukernels, or registered ONLY real-domain ukernels as row-preferential. This is a long story, but it boils down to an exception to the "transpose the operation to bring storage of C into agreement with ukernel pref" optimization in bli_hemm_front.c and bli_symm_front.c sabotaging the proper functioning of the 1m method, but only when the imaginary component of beta is zero. See the comments in issue #342 for more details. Thanks to Dave Love for identifying the commit in which this bug was introduced, and other feedback related to this bug. --- frame/3/hemm/bli_hemm_front.c | 3 ++- frame/3/symm/bli_symm_front.c | 3 ++- frame/3/trmm/bli_trmm_front.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/frame/3/hemm/bli_hemm_front.c b/frame/3/hemm/bli_hemm_front.c index 719cab62d..14759e515 100644 --- a/frame/3/hemm/bli_hemm_front.c +++ b/frame/3/hemm/bli_hemm_front.c @@ -111,7 +111,8 @@ void bli_hemm_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( !bli_obj_is_1x1( &c_local ) ) + //if ( !bli_obj_is_1x1( &c_local ) ) // NOTE: This conditional should NOT + // be enabled. See issue #342 comments. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); diff --git a/frame/3/symm/bli_symm_front.c b/frame/3/symm/bli_symm_front.c index 4e267fa3d..4d0727202 100644 --- a/frame/3/symm/bli_symm_front.c +++ b/frame/3/symm/bli_symm_front.c @@ -111,7 +111,8 @@ void bli_symm_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( !bli_obj_is_1x1( &c_local ) ) + //if ( !bli_obj_is_1x1( &c_local ) ) // NOTE: This conditional should NOT + // be enabled. See issue #342 comments. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); diff --git a/frame/3/trmm/bli_trmm_front.c b/frame/3/trmm/bli_trmm_front.c index 528e016e9..6b71a4998 100644 --- a/frame/3/trmm/bli_trmm_front.c +++ b/frame/3/trmm/bli_trmm_front.c @@ -129,7 +129,8 @@ void bli_trmm_front // micro-kernel to access elements of C in its preferred manner. // NOTE: We disable the optimization for 1x1 matrices since the concept // of row- vs. column storage breaks down. - if ( !bli_obj_is_1x1( &c_local ) ) + //if ( !bli_obj_is_1x1( &c_local ) ) // NOTE: This conditional should NOT + // be enabled. See issue #342 comments. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side );