From 8eb264f78b7f4b6e6df8d3fa47360ea411344aed Mon Sep 17 00:00:00 2001 From: Meghana Vankadari Date: Mon, 16 Dec 2019 14:27:41 +0530 Subject: [PATCH] Change in threshold condition for trsm_small kernels Change-Id: I396e246b1639d300fcb94bdf7e5fa8bc8c87e994 --- config/zen2/bli_family_zen2.h | 24 ++++++++++-- kernels/zen/3/bli_trsm_small.c | 68 +++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 34 deletions(-) diff --git a/config/zen2/bli_family_zen2.h b/config/zen2/bli_family_zen2.h index def0e21f9..45088cfce 100644 --- a/config/zen2/bli_family_zen2.h +++ b/config/zen2/bli_family_zen2.h @@ -60,11 +60,27 @@ #define BLIS_ENABLE_SMALL_MATRIX_ROME #define BLIS_SMALL_MATRIX_THRES_ROME 400 -#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME 120 -#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 60 -#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 80 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M 40 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M 1000 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N 10 -#define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M 5 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N 130 + +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME 120 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M 10 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N 1200 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M 30 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N 280 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N 100 + +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME 110 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N 30 + +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME 120 +#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME_COL_PANEL_N 50 // When running HPL with pure MPI without DGEMM threading (Single-threaded // BLIS), defining this macro as 1 yields better performance. diff --git a/kernels/zen/3/bli_trsm_small.c b/kernels/zen/3/bli_trsm_small.c index 29c99f60c..906389d34 100644 --- a/kernels/zen/3/bli_trsm_small.c +++ b/kernels/zen/3/bli_trsm_small.c @@ -953,10 +953,11 @@ static err_t bli_dtrsm_small_AlXB( dim_t n = bli_obj_width(b); // number of columns of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME) - { + if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME) + || (m> D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N) + || (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M && n D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES) { @@ -3008,10 +3009,11 @@ static err_t bli_dtrsm_small_AlXB_unitDiag( dim_t n = bli_obj_width(b); // number of columns of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME) - { + if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME) + || (m> D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N) + || (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M && n D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES) { @@ -4683,10 +4685,10 @@ static err_t bli_dtrsm_small_XAuB( dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO) - { + if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME) + || (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N) + ) return BLIS_NOT_YET_IMPLEMENTED; - } #else if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO) { @@ -6410,10 +6412,10 @@ static err_t bli_dtrsm_small_XAuB_unitDiag( dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO) - { + if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME) + || (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N) + ) return BLIS_NOT_YET_IMPLEMENTED; - } #else if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO) { @@ -7864,10 +7866,13 @@ static err_t bli_dtrsm_small_XAltB( dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) - { + if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N) + || (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N) + || (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME) + || (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME) + || (m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N) + ) return BLIS_NOT_YET_IMPLEMENTED; - } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { @@ -9615,10 +9620,13 @@ static err_t bli_dtrsm_small_XAltB_unitDiag( dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) - { + if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N) + || (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N) + || (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME) + || (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME) + || (m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N) + ) return BLIS_NOT_YET_IMPLEMENTED; - } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { @@ -11082,10 +11090,10 @@ static err_t bli_dtrsm_small_XAlB( #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) - { + if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME) + ||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N) + ) return BLIS_NOT_YET_IMPLEMENTED; - } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { @@ -12331,10 +12339,10 @@ static err_t bli_dtrsm_small_XAlB_unitDiag( #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) - { + if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME) + ||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N) + ) return BLIS_NOT_YET_IMPLEMENTED; - } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { @@ -13325,10 +13333,10 @@ static err_t bli_dtrsm_small_XAutB( dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) - { + if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME) + ||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N) + ) return BLIS_NOT_YET_IMPLEMENTED; - } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { @@ -14594,10 +14602,10 @@ static err_t bli_dtrsm_small_XAutB_unitDiag( dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME - if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) - { + if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME) + ||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N) + ) return BLIS_NOT_YET_IMPLEMENTED; - } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) {