diff --git a/frame/3/trsm/bli_trsm_front.c b/frame/3/trsm/bli_trsm_front.c index 35cd2d4b8..1c8fa0821 100644 --- a/frame/3/trsm/bli_trsm_front.c +++ b/frame/3/trsm/bli_trsm_front.c @@ -145,12 +145,21 @@ void bli_trsm_front rntm ); + // If TRSM and GEMM have different blocksizes and blocksizes + // are changed in global cntx object, when GEMM and TRSM are + // called in parallel, blocksizes in global cntx object will + // not be correct for GEMM + // to fix this + // create a local copy of cntx so that overriding the blocksizes does + // not impact the global cntx object. + cntx_t cntx_trsm = *cntx; + // A sort of hack for communicating the desired pach schemas for A and B // to bli_trsm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). - if ( bli_cntx_method( cntx ) == BLIS_NAT ) + if ( bli_cntx_method( &cntx_trsm ) == BLIS_NAT ) { #if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_ZEN4) /* Zen4 TRSM Fixme: @@ -167,7 +176,7 @@ void bli_trsm_front if ( (bli_arch_query_id() == BLIS_ARCH_ZEN4) && (bli_obj_dt(a) == BLIS_FLOAT) ) { - bli_zen4_override_trsm_blkszs(cntx); + bli_zen4_override_trsm_blkszs(&cntx_trsm); } #endif bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); @@ -175,8 +184,8 @@ void bli_trsm_front } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + pack_t schema_a = bli_cntx_schema_a_block( &cntx_trsm ); + pack_t schema_b = bli_cntx_schema_b_panel( &cntx_trsm ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &b_local ); @@ -192,24 +201,11 @@ void bli_trsm_front &b_local, alpha, &c_local, - cntx, + &cntx_trsm, rntm, cntl ); - -#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_ZEN4) - /* Zen4 TRSM Fixme: - * - * We have overrding the block sizes at the start of this function - * Since the context is created only once we need to ensure that the - * default block sizes are restored for the subsequent operations. - */ - if ( (bli_arch_query_id() == BLIS_ARCH_ZEN4) && - (bli_obj_dt(a) == BLIS_FLOAT) ) - { - bli_zen4_restore_default_blkszs(cntx); - } -#endif + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3); }