Fixed bug related to block size override for TRSM

- In zen4 TRSM and GEMM have different blocksizes,
  when trsm is called, blocksizes are changed in
  global cntx object. If GEMM and TRSM are called
  in parallel, blocksizes in global cntx will not
  be correct for GEMM which will cause a seg fault.
- To fix this, a local copy of cntx is created
  and blocksizes are changed only in the local copy.

AMD-Internal:[CPUPL-2896]
Change-Id: I0e724520a92fc3b2ed0becf385ec41ab5d1b4490
This commit is contained in:
Shubham
2023-01-05 04:49:57 +05:30
committed by Shubham Sharma
parent 82c2eb4e8e
commit 9e8595356f

View File

@@ -145,12 +145,21 @@ void bli_trsm_front
rntm
);
// If TRSM and GEMM have different blocksizes and blocksizes
// are changed in global cntx object, when GEMM and TRSM are
// called in parallel, blocksizes in global cntx object will
// not be correct for GEMM
// to fix this
// create a local copy of cntx so that overriding the blocksizes does
// not impact the global cntx object.
cntx_t cntx_trsm = *cntx;
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_trsm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
if ( bli_cntx_method( &cntx_trsm ) == BLIS_NAT )
{
#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_ZEN4)
/* Zen4 TRSM Fixme:
@@ -167,7 +176,7 @@ void bli_trsm_front
if ( (bli_arch_query_id() == BLIS_ARCH_ZEN4) &&
(bli_obj_dt(a) == BLIS_FLOAT) )
{
bli_zen4_override_trsm_blkszs(cntx);
bli_zen4_override_trsm_blkszs(&cntx_trsm);
}
#endif
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
@@ -175,8 +184,8 @@ void bli_trsm_front
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
pack_t schema_a = bli_cntx_schema_a_block( &cntx_trsm );
pack_t schema_b = bli_cntx_schema_b_panel( &cntx_trsm );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
@@ -192,24 +201,11 @@ void bli_trsm_front
&b_local,
alpha,
&c_local,
cntx,
&cntx_trsm,
rntm,
cntl
);
#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_ZEN4)
/* Zen4 TRSM Fixme:
*
* We have overrding the block sizes at the start of this function
* Since the context is created only once we need to ensure that the
* default block sizes are restored for the subsequent operations.
*/
if ( (bli_arch_query_id() == BLIS_ARCH_ZEN4) &&
(bli_obj_dt(a) == BLIS_FLOAT) )
{
bli_zen4_restore_default_blkszs(cntx);
}
#endif
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3);
}