mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Some fixes to gemm thread info tree creation,
Changed microkernel tests to use the new BLIS_PACKM_SINGLE_THREADED instead of BLIS_SINGLE_THREADED
This commit is contained in:
@@ -265,7 +265,7 @@ void PASTEMAC(ch,varname )( \
|
||||
p_begin = p_cast; \
|
||||
dim_t t_id = thread_id( thread ); \
|
||||
dim_t num_threads = thread_num_threads( thread ); \
|
||||
\
|
||||
p_inc = ps_p; \
|
||||
\
|
||||
for ( ic = ic0 + t_id * ic_inc, ip = ip0 + t_id * ip_inc, it = t_id; it < num_iter; \
|
||||
ic += num_threads * ic_inc, ip += num_threads * ip_inc, it += num_threads ) \
|
||||
|
||||
@@ -55,7 +55,7 @@ gemm_t* gemm_cntl_vl_mm;
|
||||
|
||||
gemm_t* gemm_cntl;
|
||||
|
||||
dim_t gemm_caucuses_at_level[5] = {2, 1, 1, 1, 1};
|
||||
dim_t gemm_caucuses_at_level[5] = {1, 1, 2, 1, 1};
|
||||
|
||||
gemm_thrinfo_t* bli_gemm_cntl_get_thrinfos()
|
||||
{
|
||||
|
||||
@@ -86,7 +86,6 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_
|
||||
|
||||
gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_levels )
|
||||
{
|
||||
|
||||
assert(n_levels == 5);
|
||||
|
||||
dim_t jc_way = threads_at_level[0];
|
||||
@@ -96,31 +95,33 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_
|
||||
dim_t ir_way = threads_at_level[4];
|
||||
|
||||
dim_t global_num_threads = jc_way * kc_way * ic_way * jr_way * ir_way;
|
||||
assert( global_num_threads != 0 );
|
||||
|
||||
dim_t jc_nt = kc_way * ic_way * jr_way * ir_way;
|
||||
dim_t kc_nt = ic_way * jr_way * ir_way;
|
||||
dim_t ic_nt = jr_way * ir_way;
|
||||
dim_t jr_nt = ir_way;
|
||||
dim_t ir_nt = 1;
|
||||
|
||||
|
||||
gemm_thrinfo_t* paths = (gemm_thrinfo_t*) malloc( global_num_threads * sizeof( gemm_thrinfo_t ) );
|
||||
|
||||
thread_comm_t* global_comm = bli_create_communicator( global_num_threads );
|
||||
for( int a = 0; a < jc_nt; a++ )
|
||||
for( int a = 0; a < jc_way; a++ )
|
||||
{
|
||||
thread_comm_t* jc_comm = bli_create_communicator( jc_nt );
|
||||
for( int b = 0; b < kc_nt; b++ )
|
||||
for( int b = 0; b < kc_way; b++ )
|
||||
{
|
||||
thread_comm_t* kc_comm = bli_create_communicator( kc_nt );
|
||||
for( int c = 0; c < ic_nt; c++ )
|
||||
for( int c = 0; c < ic_way; c++ )
|
||||
{
|
||||
thread_comm_t* ic_comm = bli_create_communicator( ic_nt );
|
||||
for( int d = 0; d < jr_nt; d++ )
|
||||
for( int d = 0; d < jr_way; d++ )
|
||||
{
|
||||
thread_comm_t* jr_comm = bli_create_communicator( jr_nt );
|
||||
for( int e = 0; e < jc_nt; e++)
|
||||
for( int e = 0; e < ir_way; e++)
|
||||
{
|
||||
thread_comm_t* ir_comm = bli_create_communicator( ir_nt );
|
||||
|
||||
dim_t ir_comm_id = 0;
|
||||
dim_t jr_comm_id = e*ir_nt + ir_comm_id;
|
||||
dim_t ic_comm_id = d*jr_nt + jr_comm_id;
|
||||
|
||||
@@ -214,6 +214,7 @@ thrinfo_t* bli_create_thread_info( dim_t* caucuses_at_level, dim_t n_levels )
|
||||
void bli_get_range( void* thr, dim_t size, dim_t block_factor, dim_t* start, dim_t* end )
|
||||
{
|
||||
thrinfo_t* thread = (thrinfo_t*) thr;
|
||||
|
||||
dim_t n_way = thread->n_way;
|
||||
dim_t work_id = thread->work_id;
|
||||
dim_t n_pt = size / n_way;
|
||||
|
||||
@@ -221,8 +221,8 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params,
|
||||
&b, &bp );
|
||||
|
||||
// Pack the contents of a and b to ap and bp, respectively.
|
||||
bli_packm_blk_var1( &a, &ap, &BLIS_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &a, &ap, &BLIS_PACKM_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED );
|
||||
|
||||
|
||||
// Repeat the experiment n_repeats times and record results.
|
||||
|
||||
@@ -251,10 +251,10 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params,
|
||||
&b, &bp );
|
||||
|
||||
// Pack the contents of a to ap.
|
||||
bli_packm_blk_var1( &a, &ap, &BLIS_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &a, &ap, &BLIS_PACKM_SINGLE_THREADED );
|
||||
|
||||
// Pack the contents of b to bp.
|
||||
bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED );
|
||||
|
||||
|
||||
// Create subpartitions from the a and b panels.
|
||||
@@ -268,7 +268,7 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params,
|
||||
bli_copym( &c11_save, &c11 );
|
||||
|
||||
// Re-pack the contents of b to bp.
|
||||
bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED );
|
||||
|
||||
time = bli_clock();
|
||||
|
||||
|
||||
@@ -217,14 +217,14 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params,
|
||||
&b, &bp );
|
||||
|
||||
// Pack the contents of a to ap.
|
||||
bli_packm_blk_var1( &a, &ap, &BLIS_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &a, &ap, &BLIS_PACKM_SINGLE_THREADED );
|
||||
|
||||
|
||||
// Repeat the experiment n_repeats times and record results.
|
||||
for ( i = 0; i < n_repeats; ++i )
|
||||
{
|
||||
// Re-pack the contents of b to bp.
|
||||
bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED );
|
||||
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
|
||||
Reference in New Issue
Block a user