mirror of
https://github.com/amd/blis.git
synced 2026-05-12 10:05:38 +00:00
Change in threshold condition for trsm_small kernels
Change-Id: I396e246b1639d300fcb94bdf7e5fa8bc8c87e994
This commit is contained in:
@@ -60,11 +60,27 @@
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
#define BLIS_SMALL_MATRIX_THRES_ROME 400
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME 120
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 60
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 80
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M 40
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M 1000
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N 10
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M 5
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N 130
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME 120
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M 10
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N 1200
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M 30
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N 280
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N 100
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME 110
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N 30
|
||||
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME 120
|
||||
#define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME_COL_PANEL_N 50
|
||||
|
||||
// When running HPL with pure MPI without DGEMM threading (Single-threaded
|
||||
// BLIS), defining this macro as 1 yields better performance.
|
||||
|
||||
@@ -953,10 +953,11 @@ static err_t bli_dtrsm_small_AlXB(
|
||||
dim_t n = bli_obj_width(b); // number of columns of matrix B
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME)
|
||||
{
|
||||
if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME)
|
||||
|| (m> D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N)
|
||||
|| (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M && n<D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES)
|
||||
{
|
||||
@@ -3008,10 +3009,11 @@ static err_t bli_dtrsm_small_AlXB_unitDiag(
|
||||
dim_t n = bli_obj_width(b); // number of columns of matrix B
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME)
|
||||
{
|
||||
if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME)
|
||||
|| (m> D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N)
|
||||
|| (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M && n<D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES)
|
||||
{
|
||||
@@ -4683,10 +4685,10 @@ static err_t bli_dtrsm_small_XAuB(
|
||||
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO)
|
||||
{
|
||||
if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME)
|
||||
|| (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO)
|
||||
{
|
||||
@@ -6410,10 +6412,10 @@ static err_t bli_dtrsm_small_XAuB_unitDiag(
|
||||
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO)
|
||||
{
|
||||
if((m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME)
|
||||
|| (m>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO)
|
||||
{
|
||||
@@ -7864,10 +7866,13 @@ static err_t bli_dtrsm_small_XAltB(
|
||||
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
|
||||
{
|
||||
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N)
|
||||
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N)
|
||||
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME)
|
||||
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME)
|
||||
|| (m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
|
||||
{
|
||||
@@ -9615,10 +9620,13 @@ static err_t bli_dtrsm_small_XAltB_unitDiag(
|
||||
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
|
||||
{
|
||||
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M && n>D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N)
|
||||
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N)
|
||||
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME)
|
||||
|| (m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME)
|
||||
|| (m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
|
||||
{
|
||||
@@ -11082,10 +11090,10 @@ static err_t bli_dtrsm_small_XAlB(
|
||||
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
|
||||
{
|
||||
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME)
|
||||
||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
|
||||
{
|
||||
@@ -12331,10 +12339,10 @@ static err_t bli_dtrsm_small_XAlB_unitDiag(
|
||||
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
|
||||
{
|
||||
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME)
|
||||
||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
|
||||
{
|
||||
@@ -13325,10 +13333,10 @@ static err_t bli_dtrsm_small_XAutB(
|
||||
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
|
||||
{
|
||||
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME)
|
||||
||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
|
||||
{
|
||||
@@ -14594,10 +14602,10 @@ static err_t bli_dtrsm_small_XAutB_unitDiag(
|
||||
dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX_ROME
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME)
|
||||
{
|
||||
if((m < D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME)
|
||||
||(m > D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME && n > D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N)
|
||||
)
|
||||
return BLIS_NOT_YET_IMPLEMENTED;
|
||||
}
|
||||
#else
|
||||
if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user