mirror of
https://github.com/amd/blis.git
synced 2026-05-24 18:34:40 +00:00
Tuning the decision logic to choose SUP vs Native for ZGEMM
- Added an additional decision logic to choose between SUP and Native paths for zen4 and zen5 micro-architectures, based on the input dimensions. This logic has been added to the architecture-specific thresholds functions, that are registered in the context. - The decision logic will overrule the discrete thresholds present in the zen4 and zen5 contexts. AMD-Internal: [CPUPL-5547] Change-Id: I475f19b110064b3b9eef2e03bbdc21f4dd826c03
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -66,6 +66,33 @@ bool bli_cntx_gemmsup_thresh_is_met_zen4( obj_t* a, obj_t* b, obj_t* c, cntx_t*
|
||||
if((m < 5000) && (n < 5000) && (k < 5000)) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
else if( dt == BLIS_DCOMPLEX )
|
||||
{
|
||||
dim_t k = bli_obj_width_after_trans( a );
|
||||
dim_t m, n;
|
||||
|
||||
const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b );
|
||||
|
||||
if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) )
|
||||
{
|
||||
m = bli_obj_width(c);
|
||||
n = bli_obj_length(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
m = bli_obj_length( c );
|
||||
n = bli_obj_width( c );
|
||||
}
|
||||
// For skinny sizes where m and/or n is small
|
||||
// The threshold for m is a single value, but for n, it is
|
||||
// also based on the packing size of A, since the kernels are
|
||||
// column preferential
|
||||
if( ( m <= 84 ) || ( ( n <= 84 ) && ( m < 4000 ) ) ) return TRUE;
|
||||
|
||||
// For all combinations in small sizes
|
||||
if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx );
|
||||
}
|
||||
|
||||
@@ -66,6 +66,33 @@ bool bli_cntx_gemmsup_thresh_is_met_zen5( obj_t* a, obj_t* b, obj_t* c, cntx_t*
|
||||
if((m < 2200) && (n < 2200) && (k < 2200)) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
else if( dt == BLIS_DCOMPLEX )
|
||||
{
|
||||
dim_t k = bli_obj_width_after_trans( a );
|
||||
dim_t m, n;
|
||||
|
||||
const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b );
|
||||
|
||||
if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) )
|
||||
{
|
||||
m = bli_obj_width(c);
|
||||
n = bli_obj_length(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
m = bli_obj_length( c );
|
||||
n = bli_obj_width( c );
|
||||
}
|
||||
// For skinny sizes where m and/or n is small
|
||||
// The threshold for m is a single value, but for n, it is
|
||||
// also based on the packing size of A, since the kernels are
|
||||
// column preferential
|
||||
if( ( m <= 84 ) || ( ( n <= 84 ) && ( ( m * k ) <= 983040 ) ) ) return TRUE;
|
||||
|
||||
// For all combinations in small sizes
|
||||
if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx );
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user