Tuning the decision logic to choose SUP vs Native for ZGEMM

- Added an additional decision logic to choose between SUP and
  Native paths for zen4 and zen5 micro-architectures, based on
  the input dimensions. This logic has been added to the
  architecture-specific thresholds functions, that are registered
  in the context.

- The decision logic will overrule the discrete thresholds present
  in the zen4 and zen5 contexts.

AMD-Internal: [CPUPL-5547]
Change-Id: I475f19b110064b3b9eef2e03bbdc21f4dd826c03
This commit is contained in:
Vignesh Balasubramanian
2024-08-03 17:48:04 +05:30
parent bdb94fb218
commit 9843bd0317
2 changed files with 55 additions and 1 deletions

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -66,6 +66,33 @@ bool bli_cntx_gemmsup_thresh_is_met_zen4( obj_t* a, obj_t* b, obj_t* c, cntx_t*
if((m < 5000) && (n < 5000) && (k < 5000)) return TRUE;
return FALSE;
}
else if( dt == BLIS_DCOMPLEX )
{
dim_t k = bli_obj_width_after_trans( a );
dim_t m, n;
const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b );
if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) )
{
m = bli_obj_width(c);
n = bli_obj_length(c);
}
else
{
m = bli_obj_length( c );
n = bli_obj_width( c );
}
// For skinny sizes where m and/or n is small
// The threshold for m is a single value, but for n, it is
// also based on the packing size of A, since the kernels are
// column preferential
if( ( m <= 84 ) || ( ( n <= 84 ) && ( m < 4000 ) ) ) return TRUE;
// For all combinations in small sizes
if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE;
return FALSE;
}
else
return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx );
}

View File

@@ -66,6 +66,33 @@ bool bli_cntx_gemmsup_thresh_is_met_zen5( obj_t* a, obj_t* b, obj_t* c, cntx_t*
if((m < 2200) && (n < 2200) && (k < 2200)) return TRUE;
return FALSE;
}
else if( dt == BLIS_DCOMPLEX )
{
dim_t k = bli_obj_width_after_trans( a );
dim_t m, n;
const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b );
if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) )
{
m = bli_obj_width(c);
n = bli_obj_length(c);
}
else
{
m = bli_obj_length( c );
n = bli_obj_width( c );
}
// For skinny sizes where m and/or n is small
// The threshold for m is a single value, but for n, it is
// also based on the packing size of A, since the kernels are
// column preferential
if( ( m <= 84 ) || ( ( n <= 84 ) && ( ( m * k ) <= 983040 ) ) ) return TRUE;
// For all combinations in small sizes
if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE;
return FALSE;
}
else
return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx );
}