From 4f96bb712e8f9f14e097a353f53c860e59cc60bc Mon Sep 17 00:00:00 2001 From: Arnav Sharma Date: Wed, 13 Jul 2022 11:42:35 +0530 Subject: [PATCH] AOCL Dynamic Optimization for DGEMMT - Fine-tuned the thread allocation logic for parallelizing DGEMMT for the cases where n <= 220. This results in performance improvement in multi-threaded DGEMMT for small values of n. AMD-Internal: [CPUPL-2215] Change-Id: I2654bc64d2dc43c2db911e0c9175755be3aa8ba5 --- frame/base/bli_rntm.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/frame/base/bli_rntm.c b/frame/base/bli_rntm.c index 0db51870e..5efba4f2f 100644 --- a/frame/base/bli_rntm.c +++ b/frame/base/bli_rntm.c @@ -680,16 +680,35 @@ void bli_nthreads_optimum( dim_t n = bli_obj_length(c); dim_t k = bli_obj_width_after_trans(a); - if ( n < 32 ) + if ( n < 8 ) + { + if ( k <= 512) + { + n_threads_ideal = 1; + } + else if ( k <= 1024 ) + { + n_threads_ideal = 4; + } + } + else if ( n < 32 ) { if ( k < 128 ) { n_threads_ideal = 1; } - else if ( k == 128 ) + else if ( k <= 512 ) { n_threads_ideal = 4; } + else if ( k <= 1024 ) + { + n_threads_ideal = 6; + } + else if ( k <= 1600 ) + { + n_threads_ideal = 10; + } } else if ( n <= 40 ) { @@ -724,6 +743,17 @@ void bli_nthreads_optimum( n_threads_ideal = 8; } } + else if ( n < 176 ) + { + if ( k < 128 ) + { + n_threads_ideal = 8; + } + else if ( k <= 512 ) + { + n_threads_ideal = 14; + } + } else if ( n <= 220 ) { if ( k < 128 )