From 9843bd0317f01012947b0e856eef6a5bfe0680ce Mon Sep 17 00:00:00 2001 From: Vignesh Balasubramanian Date: Sat, 3 Aug 2024 17:48:04 +0530 Subject: [PATCH] Tuning the decision logic to choose SUP vs Native for ZGEMM - Added an additional decision logic to choose between SUP and Native paths for zen4 and zen5 micro-architectures, based on the input dimensions. This logic has been added to the architecture-specific thresholds functions, that are registered in the context. - The decision logic will overrule the discrete thresholds present in the zen4 and zen5 contexts. AMD-Internal: [CPUPL-5547] Change-Id: I475f19b110064b3b9eef2e03bbdc21f4dd826c03 --- kernels/zen4/aocl_smart/bli_aocl_smart.c | 29 +++++++++++++++++++++++- kernels/zen5/aocl_smart/bli_aocl_smart.c | 27 ++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/kernels/zen4/aocl_smart/bli_aocl_smart.c b/kernels/zen4/aocl_smart/bli_aocl_smart.c index 96e45b713..ae92591ed 100644 --- a/kernels/zen4/aocl_smart/bli_aocl_smart.c +++ b/kernels/zen4/aocl_smart/bli_aocl_smart.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2023-2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -66,6 +66,33 @@ bool bli_cntx_gemmsup_thresh_is_met_zen4( obj_t* a, obj_t* b, obj_t* c, cntx_t* if((m < 5000) && (n < 5000) && (k < 5000)) return TRUE; return FALSE; } + else if( dt == BLIS_DCOMPLEX ) + { + dim_t k = bli_obj_width_after_trans( a ); + dim_t m, n; + + const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b ); + + if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) ) + { + m = bli_obj_width(c); + n = bli_obj_length(c); + } + else + { + m = bli_obj_length( c ); + n = bli_obj_width( c ); + } + // For skinny sizes where m and/or n is small + // The threshold for m is a single value, but for n, it is + // also based on the packing size of A, since the kernels are + // column preferential + if( ( m <= 84 ) || ( ( n <= 84 ) && ( m < 4000 ) ) ) return TRUE; + + // For all combinations in small sizes + if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE; + return FALSE; + } else return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx ); } diff --git a/kernels/zen5/aocl_smart/bli_aocl_smart.c b/kernels/zen5/aocl_smart/bli_aocl_smart.c index 4b6c6621e..b5166ce75 100644 --- a/kernels/zen5/aocl_smart/bli_aocl_smart.c +++ b/kernels/zen5/aocl_smart/bli_aocl_smart.c @@ -66,6 +66,33 @@ bool bli_cntx_gemmsup_thresh_is_met_zen5( obj_t* a, obj_t* b, obj_t* c, cntx_t* if((m < 2200) && (n < 2200) && (k < 2200)) return TRUE; return FALSE; } + else if( dt == BLIS_DCOMPLEX ) + { + dim_t k = bli_obj_width_after_trans( a ); + dim_t m, n; + + const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b ); + + if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) ) + { + m = bli_obj_width(c); + n = bli_obj_length(c); + } + else + { + m = bli_obj_length( c ); + n = bli_obj_width( c ); + } + // For skinny sizes where m and/or n is small + // The threshold for m is a single value, but for n, it is + // also based on the packing size of A, since the kernels are + // column preferential + if( ( m <= 84 ) || ( ( n <= 84 ) && ( ( m * k ) <= 983040 ) ) ) return TRUE; + + // For all combinations in small sizes + if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE; + return FALSE; + } else return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx ); }