mirror of
https://github.com/amd/blis.git
synced 2026-05-11 01:30:00 +00:00
Add automatic loop thread assignment.
- Number of threads is determined by BLIS_NUM_THREADS or OMP_NUM_THREADS, but can be overridden by BLIS_XX_NT as before. - Threads are assigned to loops (ic, jc, ir, and jc) automatically by weighted partitioning and heuristics, both of which are tunable via bli_kernel.h. - All level-3 BLAS covered.
This commit is contained in:
@@ -1347,6 +1347,26 @@
|
||||
#endif
|
||||
|
||||
|
||||
// -- Define default threading parameters --------------------------------------
|
||||
|
||||
|
||||
#ifndef BLIS_DEFAULT_M_THREAD_RATIO
|
||||
#define BLIS_DEFAULT_M_THREAD_RATIO 2
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DEFAULT_N_THREAD_RATIO
|
||||
#define BLIS_DEFAULT_N_THREAD_RATIO 1
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DEFAULT_MR_THREAD_MAX
|
||||
#define BLIS_DEFAULT_MR_THREAD_MAX 1
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DEFAULT_NR_THREAD_MAX
|
||||
#define BLIS_DEFAULT_NR_THREAD_MAX 3
|
||||
#endif
|
||||
|
||||
|
||||
// -- Kernel blocksize checks --------------------------------------------------
|
||||
|
||||
// Verify that cache blocksizes are whole multiples of register blocksizes.
|
||||
|
||||
Reference in New Issue
Block a user