Some fixes to gemm thread info tree creation,

Changed microkernel tests to use the new BLIS_PACKM_SINGLE_THREADED
instead of BLIS_SINGLE_THREADED
This commit is contained in:
Tyler Smith
2014-03-11 14:16:08 -05:00
parent 020f80c302
commit 92233cf642
7 changed files with 18 additions and 16 deletions

View File

@@ -265,7 +265,7 @@ void PASTEMAC(ch,varname )( \
p_begin = p_cast; \
dim_t t_id = thread_id( thread ); \
dim_t num_threads = thread_num_threads( thread ); \
\
p_inc = ps_p; \
\
for ( ic = ic0 + t_id * ic_inc, ip = ip0 + t_id * ip_inc, it = t_id; it < num_iter; \
ic += num_threads * ic_inc, ip += num_threads * ip_inc, it += num_threads ) \

View File

@@ -55,7 +55,7 @@ gemm_t* gemm_cntl_vl_mm;
gemm_t* gemm_cntl;
dim_t gemm_caucuses_at_level[5] = {2, 1, 1, 1, 1};
dim_t gemm_caucuses_at_level[5] = {1, 1, 2, 1, 1};
gemm_thrinfo_t* bli_gemm_cntl_get_thrinfos()
{

View File

@@ -86,7 +86,6 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_
gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_levels )
{
assert(n_levels == 5);
dim_t jc_way = threads_at_level[0];
@@ -96,31 +95,33 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_
dim_t ir_way = threads_at_level[4];
dim_t global_num_threads = jc_way * kc_way * ic_way * jr_way * ir_way;
assert( global_num_threads != 0 );
dim_t jc_nt = kc_way * ic_way * jr_way * ir_way;
dim_t kc_nt = ic_way * jr_way * ir_way;
dim_t ic_nt = jr_way * ir_way;
dim_t jr_nt = ir_way;
dim_t ir_nt = 1;
gemm_thrinfo_t* paths = (gemm_thrinfo_t*) malloc( global_num_threads * sizeof( gemm_thrinfo_t ) );
thread_comm_t* global_comm = bli_create_communicator( global_num_threads );
for( int a = 0; a < jc_nt; a++ )
for( int a = 0; a < jc_way; a++ )
{
thread_comm_t* jc_comm = bli_create_communicator( jc_nt );
for( int b = 0; b < kc_nt; b++ )
for( int b = 0; b < kc_way; b++ )
{
thread_comm_t* kc_comm = bli_create_communicator( kc_nt );
for( int c = 0; c < ic_nt; c++ )
for( int c = 0; c < ic_way; c++ )
{
thread_comm_t* ic_comm = bli_create_communicator( ic_nt );
for( int d = 0; d < jr_nt; d++ )
for( int d = 0; d < jr_way; d++ )
{
thread_comm_t* jr_comm = bli_create_communicator( jr_nt );
for( int e = 0; e < jc_nt; e++)
for( int e = 0; e < ir_way; e++)
{
thread_comm_t* ir_comm = bli_create_communicator( ir_nt );
dim_t ir_comm_id = 0;
dim_t jr_comm_id = e*ir_nt + ir_comm_id;
dim_t ic_comm_id = d*jr_nt + jr_comm_id;

View File

@@ -214,6 +214,7 @@ thrinfo_t* bli_create_thread_info( dim_t* caucuses_at_level, dim_t n_levels )
void bli_get_range( void* thr, dim_t size, dim_t block_factor, dim_t* start, dim_t* end )
{
thrinfo_t* thread = (thrinfo_t*) thr;
dim_t n_way = thread->n_way;
dim_t work_id = thread->work_id;
dim_t n_pt = size / n_way;

View File

@@ -221,8 +221,8 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params,
&b, &bp );
// Pack the contents of a and b to ap and bp, respectively.
bli_packm_blk_var1( &a, &ap, &BLIS_SINGLE_THREADED );
bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED );
bli_packm_blk_var1( &a, &ap, &BLIS_PACKM_SINGLE_THREADED );
bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED );
// Repeat the experiment n_repeats times and record results.

View File

@@ -251,10 +251,10 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params,
&b, &bp );
// Pack the contents of a to ap.
bli_packm_blk_var1( &a, &ap, &BLIS_SINGLE_THREADED );
bli_packm_blk_var1( &a, &ap, &BLIS_PACKM_SINGLE_THREADED );
// Pack the contents of b to bp.
bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED );
bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED );
// Create subpartitions from the a and b panels.
@@ -268,7 +268,7 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params,
bli_copym( &c11_save, &c11 );
// Re-pack the contents of b to bp.
bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED );
bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED );
time = bli_clock();

View File

@@ -217,14 +217,14 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params,
&b, &bp );
// Pack the contents of a to ap.
bli_packm_blk_var1( &a, &ap, &BLIS_SINGLE_THREADED );
bli_packm_blk_var1( &a, &ap, &BLIS_PACKM_SINGLE_THREADED );
// Repeat the experiment n_repeats times and record results.
for ( i = 0; i < n_repeats; ++i )
{
// Re-pack the contents of b to bp.
bli_packm_blk_var1( &b, &bp, &BLIS_SINGLE_THREADED );
bli_packm_blk_var1( &b, &bp, &BLIS_PACKM_SINGLE_THREADED );
bli_copym( &c_save, &c );