Enabled threading for packm blocked variants 3 and 4

This commit is contained in:
Tyler Smith
2014-03-18 14:35:37 -05:00
parent 0ac534cdf6
commit ec8b88f935
4 changed files with 36 additions and 20 deletions

View File

@@ -52,14 +52,16 @@ typedef void (*FUNCPTR_T)(
void* kappa,
void* c, inc_t rs_c, inc_t cs_c,
void* p, inc_t rs_p, inc_t cs_p,
dim_t pd_p, inc_t ps_p
dim_t pd_p, inc_t ps_p,
packm_thrinfo_t* thread
);
//static FUNCPTR_T GENARRAY(ftypes,packm_blk_var3);
void bli_packm_blk_var3( obj_t* c,
obj_t* p )
obj_t* p,
packm_thrinfo_t* t )
{
num_t dt_cp = bli_obj_datatype( *c );
@@ -154,7 +156,8 @@ void bli_packm_blk_var3( obj_t* c,
buf_kappa,
buf_c, rs_c, cs_c,
buf_p, rs_p, cs_p,
pd_p, ps_p );
pd_p, ps_p,
t );
}
@@ -177,7 +180,8 @@ void PASTEMAC(ch,varname)( \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
dim_t pd_p, inc_t ps_p \
dim_t pd_p, inc_t ps_p, \
packm_thrinfo_t* thread \
) \
{ \
ctype* restrict kappa_cast = kappa; \
@@ -296,14 +300,18 @@ void PASTEMAC(ch,varname)( \
} \
\
p_begin = p_cast; \
dim_t t_id = thread_id( thread ); \
dim_t num_threads = thread_num_threads( thread ); \
p_inc = ps_p; \
\
for ( ic = ic0, ip = ip0, it = 0; it < num_iter; \
ic += ic_inc, ip += ip_inc, it += 1 ) \
for ( ic = ic0 + t_id * ic_inc, ip = ip0 + t_id * ip_inc, it = t_id; it < num_iter; \
ic += num_threads * ic_inc, ip += num_threads * ip_inc, it += num_threads ) \
{ \
panel_dim_i = bli_min( panel_dim_max, iter_dim - ic ); \
\
diagoffc_i = diagoffc + (ip )*diagoffc_inc; \
c_begin = c_cast + (ic )*vs_c; \
p_begin = p_cast + (ip )*p_inc; \
\
if ( bli_is_triangular( strucc ) && \
bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel_full, *n_panel_full ) ) \
@@ -437,8 +445,6 @@ void PASTEMAC(ch,varname)( \
*/ \
\
} \
\
p_begin += p_inc; \
} \
}

View File

@@ -33,7 +33,8 @@
*/
void bli_packm_blk_var3( obj_t* c,
obj_t* p );
obj_t* p,
packm_thrinfo_t* t );
#undef GENTPROTCO
@@ -55,7 +56,8 @@ void PASTEMAC(ch,varname)( \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
dim_t pd_p, inc_t ps_p \
dim_t pd_p, inc_t ps_p, \
packm_thrinfo_t* thread \
);
INSERT_GENTPROTCO_BASIC( packm_blk_var3 )

View File

@@ -52,14 +52,16 @@ typedef void (*FUNCPTR_T)(
void* kappa,
void* c, inc_t rs_c, inc_t cs_c,
void* p, inc_t rs_p, inc_t cs_p,
dim_t pd_p, inc_t ps_p
dim_t pd_p, inc_t ps_p,
packm_thrinfo_t* thread
);
//static FUNCPTR_T GENARRAY(ftypes,packm_blk_var4);
void bli_packm_blk_var4( obj_t* c,
obj_t* p )
obj_t* p,
packm_thrinfo_t* t )
{
num_t dt_cp = bli_obj_datatype( *c );
@@ -154,7 +156,8 @@ void bli_packm_blk_var4( obj_t* c,
buf_kappa,
buf_c, rs_c, cs_c,
buf_p, rs_p, cs_p,
pd_p, ps_p );
pd_p, ps_p,
t );
}
@@ -177,7 +180,8 @@ void PASTEMAC(ch,varname)( \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
dim_t pd_p, inc_t ps_p \
dim_t pd_p, inc_t ps_p, \
packm_thrinfo_t* thread \
) \
{ \
ctype* restrict kappa_cast = kappa; \
@@ -296,14 +300,18 @@ void PASTEMAC(ch,varname)( \
} \
\
p_begin = p_cast; \
dim_t t_id = thread_id( thread ); \
dim_t num_threads = thread_num_threads( thread ); \
p_inc = ps_p; \
\
for ( ic = ic0, ip = ip0, it = 0; it < num_iter; \
ic += ic_inc, ip += ip_inc, it += 1 ) \
for ( ic = ic0 + t_id * ic_inc, ip = ip0 + t_id * ip_inc, it = t_id; it < num_iter; \
ic += num_threads * ic_inc, ip += num_threads * ip_inc, it += num_threads ) \
{ \
panel_dim_i = bli_min( panel_dim_max, iter_dim - ic ); \
\
diagoffc_i = diagoffc + (ip )*diagoffc_inc; \
c_begin = c_cast + (ic )*vs_c; \
p_begin = p_cast + (ip )*p_inc; \
\
if ( bli_is_triangular( strucc ) && \
bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel_full, *n_panel_full ) ) \
@@ -452,8 +460,6 @@ void PASTEMAC(ch,varname)( \
*/ \
\
} \
\
p_begin += p_inc; \
} \
}

View File

@@ -33,7 +33,8 @@
*/
void bli_packm_blk_var4( obj_t* c,
obj_t* p );
obj_t* p,
packm_thrinfo_t* t );
#undef GENTPROTCO
@@ -55,7 +56,8 @@ void PASTEMAC(ch,varname)( \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
dim_t pd_p, inc_t ps_p \
dim_t pd_p, inc_t ps_p, \
packm_thrinfo_t* t \
);
INSERT_GENTPROTCO_BASIC( packm_blk_var4 )