mirror of
https://github.com/amd/blis.git
synced 2026-05-25 02:44:31 +00:00
AOCL Dynamic Optimization for DGEMMT
- Fine-tuned the thread allocation logic for parallelizing DGEMMT for the cases where n <= 220. This results in performance improvement in multi-threaded DGEMMT for small values of n. AMD-Internal: [CPUPL-2215] Change-Id: I2654bc64d2dc43c2db911e0c9175755be3aa8ba5
This commit is contained in:
committed by
Arnav Sharma
parent
2ad25a7180
commit
4f96bb712e
@@ -680,16 +680,35 @@ void bli_nthreads_optimum(
|
||||
dim_t n = bli_obj_length(c);
|
||||
dim_t k = bli_obj_width_after_trans(a);
|
||||
|
||||
if ( n < 32 )
|
||||
if ( n < 8 )
|
||||
{
|
||||
if ( k <= 512)
|
||||
{
|
||||
n_threads_ideal = 1;
|
||||
}
|
||||
else if ( k <= 1024 )
|
||||
{
|
||||
n_threads_ideal = 4;
|
||||
}
|
||||
}
|
||||
else if ( n < 32 )
|
||||
{
|
||||
if ( k < 128 )
|
||||
{
|
||||
n_threads_ideal = 1;
|
||||
}
|
||||
else if ( k == 128 )
|
||||
else if ( k <= 512 )
|
||||
{
|
||||
n_threads_ideal = 4;
|
||||
}
|
||||
else if ( k <= 1024 )
|
||||
{
|
||||
n_threads_ideal = 6;
|
||||
}
|
||||
else if ( k <= 1600 )
|
||||
{
|
||||
n_threads_ideal = 10;
|
||||
}
|
||||
}
|
||||
else if ( n <= 40 )
|
||||
{
|
||||
@@ -724,6 +743,17 @@ void bli_nthreads_optimum(
|
||||
n_threads_ideal = 8;
|
||||
}
|
||||
}
|
||||
else if ( n < 176 )
|
||||
{
|
||||
if ( k < 128 )
|
||||
{
|
||||
n_threads_ideal = 8;
|
||||
}
|
||||
else if ( k <= 512 )
|
||||
{
|
||||
n_threads_ideal = 14;
|
||||
}
|
||||
}
|
||||
else if ( n <= 220 )
|
||||
{
|
||||
if ( k < 128 )
|
||||
|
||||
Reference in New Issue
Block a user