Merge branch 'compose'

This commit is contained in:
Field G. Van Zee
2016-10-05 13:35:01 -05:00
31 changed files with 887 additions and 364 deletions

View File

@@ -341,6 +341,37 @@ pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx )
}
#endif
dim_t bli_cntx_get_num_threads( cntx_t* cntx )
{
return bli_cntx_jc_way( cntx ) *
bli_cntx_pc_way( cntx ) *
bli_cntx_ic_way( cntx ) *
bli_cntx_jr_way( cntx ) *
bli_cntx_ir_way( cntx );
}
dim_t bli_cntx_get_num_threads_in( cntx_t* cntx, cntl_t* cntl )
{
dim_t n_threads_in = 1;
for ( ; cntl != NULL; cntl = bli_cntl_sub_node( cntl ) )
{
bszid_t bszid = bli_cntl_bszid( cntl );
dim_t cur_way;
// We assume bszid is in {KR,MR,NR,MC,KC,NR} if it is not
// BLIS_NO_PART.
if ( bszid != BLIS_NO_PART )
cur_way = bli_cntx_way_for_bszid( bszid, cntx );
else
cur_way = 1;
n_threads_in *= cur_way;
}
return n_threads_in;
}
// -----------------------------------------------------------------------------
#if 1
@@ -663,6 +694,96 @@ void bli_cntx_set_pack_schema_c( pack_t schema_c,
bli_cntx_set_schema_c( schema_c, cntx );
}
void bli_cntx_set_thrloop_from_env( opid_t l3_op, side_t side, cntx_t* cntx )
{
dim_t jc, pc, ic, jr, ir;
#ifdef BLIS_ENABLE_MULTITHREADING
jc = bli_env_read_nway( "BLIS_JC_NT" );
//pc = bli_env_read_nway( "BLIS_KC_NT" );
pc = 1;
ic = bli_env_read_nway( "BLIS_IC_NT" );
jr = bli_env_read_nway( "BLIS_JR_NT" );
ir = bli_env_read_nway( "BLIS_IR_NT" );
#else
jc = 1;
pc = 1;
ic = 1;
jr = 1;
ir = 1;
#endif
if ( l3_op == BLIS_TRMM )
{
// We reconfigure the paralelism from trmm_r due to a dependency in
// the jc loop. (NOTE: This dependency does not exist for trmm3 )
if ( bli_is_right( side ) )
{
bli_cntx_set_thrloop
(
1,
pc,
ic,
jr * jc,
ir,
cntx
);
}
else // if ( bli_is_left( side ) )
{
bli_cntx_set_thrloop
(
jc,
pc,
ic,
jr,
ir,
cntx
);
}
}
else if ( l3_op == BLIS_TRSM )
{
if ( bli_is_right( side ) )
{
bli_cntx_set_thrloop
(
1,
1,
jc * ic * jr,
1,
1,
cntx
);
}
else // if ( bli_is_left( side ) )
{
bli_cntx_set_thrloop
(
1,
1,
1,
ic * jr * ir,
1,
cntx
);
}
}
else // if ( l3_op == BLIS_TRSM )
{
bli_cntx_set_thrloop
(
jc,
pc,
ic,
jr,
ir,
cntx
);
}
}
// -----------------------------------------------------------------------------
bool_t bli_cntx_l3_nat_ukr_prefers_rows_dt( num_t dt,

View File

@@ -59,6 +59,8 @@ typedef struct cntx_s
pack_t schema_b;
pack_t schema_c;
dim_t* thrloop;
membrk_t* membrk;
} cntx_t;
*/
@@ -127,6 +129,36 @@ typedef struct cntx_s
\
( (cntx)->membrk )
#define bli_cntx_thrloop( cntx ) \
\
( (cntx)->thrloop )
#if 1
#define bli_cntx_jc_way( cntx ) \
\
( (cntx)->thrloop[ BLIS_NC ] )
#define bli_cntx_pc_way( cntx ) \
\
( (cntx)->thrloop[ BLIS_KC ] )
#define bli_cntx_ic_way( cntx ) \
\
( (cntx)->thrloop[ BLIS_MC ] )
#define bli_cntx_jr_way( cntx ) \
\
( (cntx)->thrloop[ BLIS_NR ] )
#define bli_cntx_ir_way( cntx ) \
\
( (cntx)->thrloop[ BLIS_MR ] )
#endif
#define bli_cntx_way_for_bszid( bszid, cntx ) \
\
( (cntx)->thrloop[ bszid ] )
// cntx_t modification (fields only)
#define bli_cntx_set_blkszs_buf( _blkszs, cntx_p ) \
@@ -199,6 +231,16 @@ typedef struct cntx_s
(cntx_p)->membrk = _membrk; \
}
#define bli_cntx_set_thrloop( jc_, pc_, ic_, jr_, ir_, cntx_p ) \
{ \
(cntx_p)->thrloop[ BLIS_NC ] = jc_; \
(cntx_p)->thrloop[ BLIS_KC ] = pc_; \
(cntx_p)->thrloop[ BLIS_MC ] = ic_; \
(cntx_p)->thrloop[ BLIS_NR ] = jr_; \
(cntx_p)->thrloop[ BLIS_MR ] = ir_; \
(cntx_p)->thrloop[ BLIS_KR ] = 1; \
}
// cntx_t query (complex)
#define bli_cntx_get_blksz_def_dt( dt, bs_id, cntx ) \
@@ -356,6 +398,8 @@ func_t* bli_cntx_get_packm_ukr( cntx_t* cntx );
//pack_t bli_cntx_get_pack_schema_a( cntx_t* cntx );
//pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx );
//pack_t bli_cntx_get_pack_schema_c( cntx_t* cntx );
dim_t bli_cntx_get_num_threads( cntx_t* cntx );
dim_t bli_cntx_get_num_threads_in( cntx_t* cntx, cntl_t* cntl );
// set functions
@@ -390,6 +434,9 @@ void bli_cntx_set_pack_schema_b( pack_t schema_b,
cntx_t* cntx );
void bli_cntx_set_pack_schema_c( pack_t schema_c,
cntx_t* cntx );
void bli_cntx_set_thrloop_from_env( opid_t l3_op,
side_t side,
cntx_t* cntx );
// other query functions