Change in threshold condition for trsm_small kernels

Change-Id: I396e246b1639d300fcb94bdf7e5fa8bc8c87e994
This commit is contained in:
Meghana Vankadari
2019-12-16 14:27:41 +05:30
parent a8af07f68c
commit 8eb264f78b
2 changed files with 58 additions and 34 deletions

View File

@@ -60,11 +60,27 @@
#define BLIS_ENABLE_SMALL_MATRIX_ROME
#define BLIS_SMALL_MATRIX_THRES_ROME 400
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME 120
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 60
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 80
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M 40
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M 1000
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N 10
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M 5
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N 130
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME 120
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M 10
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N 1200
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M 30
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N 280
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N 100
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME 110
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N 30
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME 120
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME_COL_PANEL_N 50
// When running HPL with pure MPI without DGEMM threading (Single-threaded
// BLIS), defining this macro as 1 yields better performance.

View File

@@ -953,10 +953,11 @@ static err_t bli_dtrsm_small_AlXB(
dim_t n = bli_obj_width(b); // number of columns of matrix B
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME)
{
if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME)
|| (m> D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N)
|| (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M && n<D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES)
{
@@ -3008,10 +3009,11 @@ static err_t bli_dtrsm_small_AlXB_unitDiag(
dim_t n = bli_obj_width(b); // number of columns of matrix B
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME)
{
if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME)
|| (m> D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N)
|| (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M && n<D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES)
{
@@ -4683,10 +4685,10 @@ static err_t bli_dtrsm_small_XAuB(
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO)
{
if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME)
|| (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO)
{
@@ -6410,10 +6412,10 @@ static err_t bli_dtrsm_small_XAuB_unitDiag(
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO)
{
if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME)
|| (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO)
{
@@ -7864,10 +7866,13 @@ static err_t bli_dtrsm_small_XAltB(
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
{
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N)
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N)
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME)
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME)
|| (m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
{
@@ -9615,10 +9620,13 @@ static err_t bli_dtrsm_small_XAltB_unitDiag(
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
{
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N)
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N)
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME)
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME)
|| (m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
{
@@ -11082,10 +11090,10 @@ static err_t bli_dtrsm_small_XAlB(
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
{
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME)
||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
{
@@ -12331,10 +12339,10 @@ static err_t bli_dtrsm_small_XAlB_unitDiag(
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
{
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME)
||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
{
@@ -13325,10 +13333,10 @@ static err_t bli_dtrsm_small_XAutB(
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
{
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME)
||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
{
@@ -14594,10 +14602,10 @@ static err_t bli_dtrsm_small_XAutB_unitDiag(
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
{
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME)
||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N)
)
return BLIS_NOT_YET_IMPLEMENTED;
}
#else
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
{