mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Fixed bug related to block size override for TRSM
- In zen4 TRSM and GEMM have different blocksizes, when trsm is called, blocksizes are changed in global cntx object. If GEMM and TRSM are called in parallel, blocksizes in global cntx will not be correct for GEMM which will cause a seg fault. - To fix this, a local copy of cntx is created and blocksizes are changed only in the local copy. AMD-Internal:[CPUPL-2896] Change-Id: I0e724520a92fc3b2ed0becf385ec41ab5d1b4490
This commit is contained in:
@@ -145,12 +145,21 @@ void bli_trsm_front
|
||||
rntm
|
||||
);
|
||||
|
||||
// If TRSM and GEMM have different blocksizes and blocksizes
|
||||
// are changed in global cntx object, when GEMM and TRSM are
|
||||
// called in parallel, blocksizes in global cntx object will
|
||||
// not be correct for GEMM
|
||||
// to fix this
|
||||
// create a local copy of cntx so that overriding the blocksizes does
|
||||
// not impact the global cntx object.
|
||||
cntx_t cntx_trsm = *cntx;
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_trsm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
if ( bli_cntx_method( &cntx_trsm ) == BLIS_NAT )
|
||||
{
|
||||
#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_ZEN4)
|
||||
/* Zen4 TRSM Fixme:
|
||||
@@ -167,7 +176,7 @@ void bli_trsm_front
|
||||
if ( (bli_arch_query_id() == BLIS_ARCH_ZEN4) &&
|
||||
(bli_obj_dt(a) == BLIS_FLOAT) )
|
||||
{
|
||||
bli_zen4_override_trsm_blkszs(cntx);
|
||||
bli_zen4_override_trsm_blkszs(&cntx_trsm);
|
||||
}
|
||||
#endif
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
@@ -175,8 +184,8 @@ void bli_trsm_front
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
pack_t schema_a = bli_cntx_schema_a_block( &cntx_trsm );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( &cntx_trsm );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &b_local );
|
||||
@@ -192,24 +201,11 @@ void bli_trsm_front
|
||||
&b_local,
|
||||
alpha,
|
||||
&c_local,
|
||||
cntx,
|
||||
&cntx_trsm,
|
||||
rntm,
|
||||
cntl
|
||||
);
|
||||
|
||||
#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_ZEN4)
|
||||
/* Zen4 TRSM Fixme:
|
||||
*
|
||||
* We have overrding the block sizes at the start of this function
|
||||
* Since the context is created only once we need to ensure that the
|
||||
* default block sizes are restored for the subsequent operations.
|
||||
*/
|
||||
if ( (bli_arch_query_id() == BLIS_ARCH_ZEN4) &&
|
||||
(bli_obj_dt(a) == BLIS_FLOAT) )
|
||||
{
|
||||
bli_zen4_restore_default_blkszs(cntx);
|
||||
}
|
||||
#endif
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user