diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index 57c1175bf..a9506fd4a 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -112,52 +112,6 @@ siz_t bli_packm_init return 0; } -#if 0 - pack_t schema; - - if ( bli_cntx_method( cntx ) != BLIS_NAT ) - { - // We now ignore the pack_schema field in the control tree and - // extract the schema from the context, depending on whether we are - // preparing to pack a block of A or panel of B. For A and B, we must - // obtain the schema from the context since the induced methods reuse - // the same control trees used by native execution, and those induced - // methods specify the schema used by the current execution phase - // within the context (whereas the control tree does not change). - - if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK ) - { - schema = bli_cntx_schema_a_block( cntx ); - } - else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL ) - { - schema = bli_cntx_schema_b_panel( cntx ); - } - else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL ) - { - schema = bli_cntl_packm_params_pack_schema( cntl ); - } - } - else // ( bli_cntx_method( cntx ) == BLIS_NAT ) - { - // For native execution, we obtain the schema from the control tree - // node. (Notice that it doesn't matter if the pack_buf_type is for - // A or B.) - schema = bli_cntl_packm_params_pack_schema( cntl ); - } - // This is no longer needed now that we branch between native and - // non-native cases above. -#if 0 - if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL ) - { - // If we get a request to pack C for some reason, it is likely - // not part of an induced method, and so it would be safe (and - // necessary) to read the pack schema from the control tree. - schema = bli_cntl_packm_params_pack_schema( cntl ); - } -#endif -#endif - // Prepare a few other variables based on properties of the control // tree. diff --git a/frame/3/bli_l3.h b/frame/3/bli_l3.h index 740733c3e..be6e802d4 100644 --- a/frame/3/bli_l3.h +++ b/frame/3/bli_l3.h @@ -46,6 +46,7 @@ #include "bli_l3_direct.h" #include "bli_l3_prune.h" #include "bli_l3_packm.h" +#include "bli_l3_schema.h" // Prototype object APIs (expert and non-expert). #include "bli_oapi_ex.h" diff --git a/frame/3/bli_l3_schema.c b/frame/3/bli_l3_schema.c new file mode 100644 index 000000000..bde30c527 --- /dev/null +++ b/frame/3/bli_l3_schema.c @@ -0,0 +1,80 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2021, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_l3_set_schemas + ( + obj_t* a, + obj_t* b, + obj_t* c, + cntx_t* cntx + ) +{ + // Begin with pack schemas for native execution. + pack_t schema_a = BLIS_PACKED_ROW_PANELS; + pack_t schema_b = BLIS_PACKED_COL_PANELS; + + // When executing the 1m method, choose the appropriate pack schemas based + // on the microkernel preference encoded within the current cntx_t (which + // was presumably returned by the gks). + if ( bli_cntx_method( cntx ) == BLIS_1M ) + { + num_t dt = bli_obj_domain( c ) | bli_obj_comp_prec( c ); + + // Note that bli_cntx_l3_vir_ukr_prefers_cols_dt() will use the real + // projection of dt to query the preference of the corresponding native + // real-domain microkernel. This is what ultimately determines which + // variant of 1m is applicable. + if ( bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ) ) + { + schema_a = BLIS_PACKED_ROW_PANELS_1E; + schema_b = BLIS_PACKED_COL_PANELS_1R; + } + else + { + schema_a = BLIS_PACKED_ROW_PANELS_1R; + schema_b = BLIS_PACKED_COL_PANELS_1E; + } + } + + // Embed the schemas into the objects for A and B. This is a sort of hack + // for communicating the desired pack schemas to bli_gemm_cntl_create() + // (via bli_l3_thread_decorator() and bli_l3_cntl_create_if()). This allows + // us to subsequently access the schemas from the control tree, which + // hopefully reduces some confusion, particularly in bli_packm_init(). + bli_obj_set_pack_schema( schema_a, a ); + bli_obj_set_pack_schema( schema_b, b ); +} + diff --git a/frame/3/bli_l3_schema.h b/frame/3/bli_l3_schema.h new file mode 100644 index 000000000..c6a12ce52 --- /dev/null +++ b/frame/3/bli_l3_schema.h @@ -0,0 +1,41 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2021, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_l3_set_schemas + ( + obj_t* a, + obj_t* b, + obj_t* c, + cntx_t* cntx + ); diff --git a/frame/3/gemm/bli_gemm_front.c b/frame/3/gemm/bli_gemm_front.c index 3a46c4ecf..bd815a4c8 100644 --- a/frame/3/gemm/bli_gemm_front.c +++ b/frame/3/gemm/bli_gemm_front.c @@ -91,6 +91,22 @@ void bli_gemm_front bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); + // An optimization: If C is stored by rows and the micro-kernel prefers + // contiguous columns, or if C is stored by columns and the micro-kernel + // prefers contiguous rows, transpose the entire operation to allow the + // micro-kernel to access elements of C in its preferred manner. + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + { + bli_obj_swap( &a_local, &b_local ); + + bli_obj_induce_trans( &a_local ); + bli_obj_induce_trans( &b_local ); + bli_obj_induce_trans( &c_local ); + } + + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &b_local, &c_local, cntx ); + #ifdef BLIS_ENABLE_GEMM_MD cntx_t cntx_local; @@ -110,24 +126,8 @@ void bli_gemm_front // is adjusted to point to cntx_local.) bli_gemm_md( &a_local, &b_local, beta, &c_local, &cntx_local, &cntx ); } - //else // homogeneous datatypes #endif - // Load the pack schemas from the context and embed them into the objects - // for A and B. (Native contexts are initialized with the correct pack - // schemas, as are contexts for 1m, and if necessary bli_gemm_md() would - // have made a copy and modified the schemas, so reading them from the - // context should be a safe bet at this point.) This is a sort of hack for - // communicating the desired pack schemas to bli_gemm_cntl_create() (via - // bli_l3_thread_decorator() and bli_l3_cntl_create_if()). This allows us - // to subsequently access the schemas from the control tree, which - // hopefully reduces some confusion, particularly in bli_packm_init(). - const pack_t schema_a = bli_cntx_schema_a_block( cntx ); - const pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &b_local ); - // Next, we handle the possibility of needing to typecast alpha to the // computation datatype and/or beta to the storage datatype of C. @@ -153,22 +153,6 @@ void bli_gemm_front if ( !bli_gemm_md_is_ccr( &a_local, &b_local, &c_local ) && !bli_gemm_md_is_crc( &a_local, &b_local, &c_local ) ) #endif - // An optimization: If C is stored by rows and the micro-kernel prefers - // contiguous columns, or if C is stored by columns and the micro-kernel - // prefers contiguous rows, transpose the entire operation to allow the - // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) - { - bli_obj_swap( &a_local, &b_local ); - - bli_obj_induce_trans( &a_local ); - bli_obj_induce_trans( &b_local ); - bli_obj_induce_trans( &c_local ); - - // We must also swap the pack schemas, which were set by bli_gemm_md() - // or the inlined code above. - bli_obj_swap_pack_schemas( &a_local, &b_local ); - } // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any diff --git a/frame/3/gemm/bli_gemm_md.c b/frame/3/gemm/bli_gemm_md.c index 0f82b15f3..e257cdf28 100644 --- a/frame/3/gemm/bli_gemm_md.c +++ b/frame/3/gemm/bli_gemm_md.c @@ -187,6 +187,10 @@ mddm_t bli_gemm_md_ccr bli_obj_induce_trans( b ); bli_obj_induce_trans( c ); + // We must swap the pack schemas because the schemas were set before + // the objects were swapped. + bli_obj_swap_pack_schemas( a, b ); + return bli_gemm_md_crc( a, b, beta, c, cntx_local, cntx ); } @@ -230,7 +234,7 @@ mddm_t bli_gemm_md_ccr bli_blksz_scale_def_max( 1, 2, BLIS_SCOMPLEX, blksz_mc ); bli_blksz_scale_def_max( 1, 2, BLIS_DCOMPLEX, blksz_mc ); - // Use the default pack schemas in the context. + // Use the default pack schemas in the objects. // static func_t* bli_cntx_get_l3_vir_ukrs( l3ukr_t ukr_id, cntx_t* cntx ) func_t* l3_vir_ukrs = bli_cntx_get_l3_vir_ukrs( BLIS_GEMM_UKR, *cntx ); @@ -288,6 +292,10 @@ mddm_t bli_gemm_md_crc bli_obj_induce_trans( b ); bli_obj_induce_trans( c ); + // We must swap the pack schemas because the schemas were set before + // the objects were swapped. + bli_obj_swap_pack_schemas( a, b ); + return bli_gemm_md_ccr( a, b, beta, c, cntx_local, cntx ); } @@ -331,7 +339,7 @@ mddm_t bli_gemm_md_crc bli_blksz_scale_def_max( 1, 2, BLIS_SCOMPLEX, blksz_nc ); bli_blksz_scale_def_max( 1, 2, BLIS_DCOMPLEX, blksz_nc ); - // Use the default pack schemas in the context. + // Use the default pack schemas in the objects. // static func_t* bli_cntx_get_l3_vir_ukrs( l3ukr_t ukr_id, cntx_t* cntx ) func_t* l3_vir_ukrs = bli_cntx_get_l3_vir_ukrs( BLIS_GEMM_UKR, *cntx ); @@ -405,8 +413,8 @@ mddm_t bli_gemm_md_rcc // Use the 1r pack schema for both A and B with the conjugation // of A or B toggled (to produce ar * br - ai * bi). - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_1R, *cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_1R, *cntx ); + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS_1R, a ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS_1R, b ); bli_obj_toggle_conj( b ); @@ -485,7 +493,7 @@ mddm_t bli_gemm_md_crr } #endif - // Use the default pack schemas in the context. + // Use the default pack schemas in the objects. // Return the computation and execution domains. return doms; @@ -523,7 +531,7 @@ mddm_t bli_gemm_md_rcr // Overwrite the complex obj_t with its real-only alias. *a = a_real; - // Use the default pack schemas in the context. + // Use the default pack schemas in the objects. // Return the computation and execution domains. return doms; @@ -561,7 +569,7 @@ mddm_t bli_gemm_md_rrc // Overwrite the complex obj_t with its real-only alias. *b = b_real; - // Use the default pack schemas in the context. + // Use the default pack schemas in the objects. // Return the computation and execution domains. return doms; @@ -591,7 +599,7 @@ mddm_t bli_gemm_md_rrr doms.comp = BLIS_REAL; doms.exec = BLIS_REAL; - // Use the default pack schemas in the context. + // Use the default pack schemas in the objects. // Return the computation and execution domains. return doms; @@ -621,248 +629,10 @@ mddm_t bli_gemm_md_ccc doms.comp = BLIS_COMPLEX; doms.exec = BLIS_COMPLEX; - // Use the default pack schemas in the context. + // Use the default pack schemas in the objects. // Return the computation and execution domains. return doms; } -// ----------------------------------------------------------------------------- - -#if 0 -void bli_gemm_md_front - ( - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c, - cntx_t* cntx, - rntm_t* rntm, - cntl_t* cntl - ) -{ - bli_init_once(); - - obj_t a_local; - obj_t b_local; - obj_t c_local; - - // Check parameters. - if ( bli_error_checking_is_enabled() ) - bli_gemm_check( alpha, a, b, beta, c, cntx ); - - // If alpha is zero, scale by beta and return. - if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) - { - bli_scalm( beta, c ); - return; - } - - // Alias A, B, and C in case we need to apply transformations. - bli_obj_alias_to( a, &a_local ); - bli_obj_alias_to( b, &b_local ); - bli_obj_alias_to( c, &c_local ); - - // An optimization: If C is stored by rows and the micro-kernel prefers - // contiguous columns, or if C is stored by columns and the micro-kernel - // prefers contiguous rows, transpose the entire operation to allow the - // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) - { - bli_obj_swap( &a_local, &b_local ); - - bli_obj_induce_trans( &a_local ); - bli_obj_induce_trans( &b_local ); - bli_obj_induce_trans( &c_local ); - } - - cntx_t cntx_local; - - // Handle mixed domain cases in bli_gemm_md(), which may modify - // the objects or the context. (If the context is modified, cntx - // is adjusted to point to cntx_local.) - bli_gemm_md( &a_local, &b_local, beta, &c_local, &cntx_local, &cntx ); - - // Record the threading for each level within the context. - bli_rntm_set_ways_for_op - ( - BLIS_GEMM, - BLIS_LEFT, // ignored for gemm/hemm/symm - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ), - rntm - ); - - // Invoke the internal back-end via the thread handler. - bli_l3_thread_decorator - ( - bli_gemm_int, - BLIS_GEMM, // operation family id - alpha, - &a_local, - &b_local, - beta, - &c_local, - cntx, - rntm, - cntl - ); -} - -// ----------------------------------------------------------------------------- - -void bli_gemm_md_zgemm - ( - obj_t* alpha, - obj_t* a, - obj_t* b, - obj_t* beta, - obj_t* c, - cntx_t* cntx, - rntm_t* rntm, - cntl_t* cntl - ) -{ - bli_init_once(); - - obj_t a_local; - obj_t b_local; - obj_t c_local; - -#if 1 - obj_t am, bm, cm; - obj_t* c_orig; - - //if ( is_md == TRUE ) - { - //num_t dt_c2 = bli_obj_dt( c ); - //num_t dt_c1 = bli_dt_proj_to_complex( dt_c2 ); - //num_t dt_c = bli_dt_proj_to_double_prec( dt_c1 ); - //num_t dt_c = bli_obj_dt_proj_to_complex( c ); - num_t dt_c = BLIS_DCOMPLEX; - - if ( bli_obj_is_single_prec( c ) ) dt_c = BLIS_SCOMPLEX; - else dt_c = BLIS_DCOMPLEX; - - if ( bli_obj_is_real( a ) && - bli_obj_is_real( b ) && - bli_obj_is_real( c ) ) dt_c = bli_dt_proj_to_real( dt_c ); - - dim_t m = bli_obj_length( c ); - dim_t n = bli_obj_width( c ); - dim_t k = bli_obj_width_after_trans( a ); - - bli_obj_create( dt_c, m, k, 0, 0, &am ); - bli_obj_create( dt_c, k, n, 0, 0, &bm ); - bli_obj_create( dt_c, m, n, 0, 0, &cm ); - - //bli_projm( a, &am ); - //bli_projm( b, &bm ); - //bli_projm( c, &cm ); - bli_castm( a, &am ); - bli_castm( b, &bm ); - bli_castm( c, &cm ); - - c_orig = c; - - a = &am; - b = &bm; - c = &cm; - } -#endif - - // Check parameters. - if ( bli_error_checking_is_enabled() ) - bli_gemm_check( alpha, a, b, beta, c, cntx ); - - // If alpha is zero, scale by beta and return. - if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) - { - bli_scalm( beta, c ); - return; - } - - // Alias A, B, and C in case we need to apply transformations. - bli_obj_alias_to( a, &a_local ); - bli_obj_alias_to( b, &b_local ); - bli_obj_alias_to( c, &c_local ); - - // An optimization: If C is stored by rows and the micro-kernel prefers - // contiguous columns, or if C is stored by columns and the micro-kernel - // prefers contiguous rows, transpose the entire operation to allow the - // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) - { - bli_obj_swap( &a_local, &b_local ); - - bli_obj_induce_trans( &a_local ); - bli_obj_induce_trans( &b_local ); - bli_obj_induce_trans( &c_local ); - } - - { - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - if ( bli_cntx_method( cntx ) == BLIS_NAT ) - { - bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); - bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); - } - else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) - { - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &b_local ); - } - } - - // Parse and interpret the contents of the rntm_t object to properly - // set the ways of parallelism for each loop, and then make any - // additional modifications necessary for the current operation. - bli_rntm_set_ways_for_op - ( - BLIS_GEMM, - BLIS_LEFT, // ignored for gemm/hemm/symm - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ), - rntm - ); - - // Invoke the internal back-end via the thread handler. - bli_l3_thread_decorator - ( - bli_gemm_int, - BLIS_GEMM, // operation family id - alpha, - &a_local, - &b_local, - beta, - &c_local, - cntx, - rntm, - cntl - ); - -#if 1 - //if ( is_md == TRUE ) - { - //bli_projm( &cm, c_orig ); - bli_castm( &cm, c_orig ); - - bli_obj_free( &am ); - bli_obj_free( &bm ); - bli_obj_free( &cm ); - } -#endif -} -#endif - #endif diff --git a/frame/3/gemmt/bli_gemmt_front.c b/frame/3/gemmt/bli_gemmt_front.c index d652618cb..21db12d26 100644 --- a/frame/3/gemmt/bli_gemmt_front.c +++ b/frame/3/gemmt/bli_gemmt_front.c @@ -92,6 +92,9 @@ void bli_gemmt_front bli_obj_induce_trans( &c_local ); } + // Set the pack schemas within the objects, as appropriate. + bli_l3_set_schemas( &a_local, &b_local, &c_local, cntx ); + // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. @@ -105,17 +108,6 @@ void bli_gemmt_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &b_local ); - // Invoke the internal back-end via the thread handler. bli_l3_thread_decorator ( diff --git a/frame/3/hemm/bli_hemm_front.c b/frame/3/hemm/bli_hemm_front.c index d1746eb4e..12c60bd39 100644 --- a/frame/3/hemm/bli_hemm_front.c +++ b/frame/3/hemm/bli_hemm_front.c @@ -130,6 +130,9 @@ void bli_hemm_front } #endif + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &b_local, &c_local, cntx ); + // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! @@ -150,17 +153,6 @@ void bli_hemm_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &b_local ); - // Invoke the internal back-end. bli_l3_thread_decorator ( diff --git a/frame/3/her2k/bli_her2k_front.c b/frame/3/her2k/bli_her2k_front.c index 096ea463b..9fe6f4584 100644 --- a/frame/3/her2k/bli_her2k_front.c +++ b/frame/3/her2k/bli_her2k_front.c @@ -83,12 +83,6 @@ void bli_her2k_front bli_obj_induce_trans( &ah_local ); bli_obj_toggle_conj( &ah_local ); - // Initialize a conjugated copy of alpha. - bli_obj_scalar_init_detached_copy_of( bli_obj_dt( a ), - BLIS_CONJUGATE, - alpha, - &alpha_conj ); - // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the @@ -106,6 +100,16 @@ void bli_her2k_front bli_obj_induce_trans( &c_local ); } + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &bh_local, &c_local, cntx ); + bli_l3_set_schemas( &b_local, &ah_local, &c_local, cntx ); + + // Initialize a conjugated copy of alpha. + bli_obj_scalar_init_detached_copy_of( bli_obj_dt( a ), + BLIS_CONJUGATE, + alpha, + &alpha_conj ); + // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. @@ -119,19 +123,6 @@ void bli_her2k_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &bh_local ); - bli_obj_set_pack_schema( schema_a, &b_local ); - bli_obj_set_pack_schema( schema_b, &ah_local ); - // Invoke herk twice, using beta only the first time. // Invoke the internal back-end. diff --git a/frame/3/herk/bli_herk_front.c b/frame/3/herk/bli_herk_front.c index a88d23e90..da159257b 100644 --- a/frame/3/herk/bli_herk_front.c +++ b/frame/3/herk/bli_herk_front.c @@ -86,6 +86,9 @@ void bli_herk_front bli_obj_induce_trans( &c_local ); } + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &ah_local, &c_local, cntx ); + // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. @@ -99,17 +102,6 @@ void bli_herk_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &ah_local ); - // Invoke the internal back-end. bli_l3_thread_decorator ( diff --git a/frame/3/symm/bli_symm_front.c b/frame/3/symm/bli_symm_front.c index 61238fb15..5fcf230b2 100644 --- a/frame/3/symm/bli_symm_front.c +++ b/frame/3/symm/bli_symm_front.c @@ -129,6 +129,9 @@ void bli_symm_front } #endif + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &b_local, &c_local, cntx ); + // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! @@ -149,17 +152,6 @@ void bli_symm_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &b_local ); - // Invoke the internal back-end. bli_l3_thread_decorator ( diff --git a/frame/3/syr2k/bli_syr2k_front.c b/frame/3/syr2k/bli_syr2k_front.c index c1532b92d..87f88f753 100644 --- a/frame/3/syr2k/bli_syr2k_front.c +++ b/frame/3/syr2k/bli_syr2k_front.c @@ -87,6 +87,10 @@ void bli_syr2k_front bli_obj_induce_trans( &c_local ); } + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &bt_local, &c_local, cntx ); + bli_l3_set_schemas( &b_local, &at_local, &c_local, cntx ); + // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. @@ -100,19 +104,6 @@ void bli_syr2k_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &bt_local ); - bli_obj_set_pack_schema( schema_a, &b_local ); - bli_obj_set_pack_schema( schema_b, &at_local ); - // Invoke herk twice, using beta only the first time. // Invoke the internal back-end. diff --git a/frame/3/syrk/bli_syrk_front.c b/frame/3/syrk/bli_syrk_front.c index 14c5d4a3d..6b91fea0d 100644 --- a/frame/3/syrk/bli_syrk_front.c +++ b/frame/3/syrk/bli_syrk_front.c @@ -89,6 +89,9 @@ void bli_syrk_front bli_obj_induce_trans( &c_local ); } + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &at_local, &c_local, cntx ); + // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. @@ -102,17 +105,6 @@ void bli_syrk_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &at_local ); - // Invoke the internal back-end. bli_l3_thread_decorator ( diff --git a/frame/3/trmm/bli_trmm_front.c b/frame/3/trmm/bli_trmm_front.c index 63fc8053f..08a4ace88 100644 --- a/frame/3/trmm/bli_trmm_front.c +++ b/frame/3/trmm/bli_trmm_front.c @@ -148,6 +148,9 @@ void bli_trmm_front #endif + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &b_local, &c_local, cntx ); + // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! @@ -168,17 +171,6 @@ void bli_trmm_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &b_local ); - // Invoke the internal back-end. bli_l3_thread_decorator ( diff --git a/frame/3/trmm3/bli_trmm3_front.c b/frame/3/trmm3/bli_trmm3_front.c index ba7d3a91f..126cd8de4 100644 --- a/frame/3/trmm3/bli_trmm3_front.c +++ b/frame/3/trmm3/bli_trmm3_front.c @@ -140,6 +140,9 @@ void bli_trmm3_front #endif + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &b_local, &c_local, cntx ); + // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! @@ -160,17 +163,6 @@ void bli_trmm3_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &b_local ); - // Invoke the internal back-end. bli_l3_thread_decorator ( diff --git a/frame/3/trsm/bli_trsm_front.c b/frame/3/trsm/bli_trsm_front.c index 77c177d8a..3533d1869 100644 --- a/frame/3/trsm/bli_trsm_front.c +++ b/frame/3/trsm/bli_trsm_front.c @@ -122,6 +122,9 @@ void bli_trsm_front #endif + // Set the pack schemas within the objects. + bli_l3_set_schemas( &a_local, &b_local, &c_local, cntx ); + // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! @@ -142,17 +145,6 @@ void bli_trsm_front rntm ); - // A sort of hack for communicating the desired pach schemas for A and B - // to bli_trsm_cntl_create() (via bli_l3_thread_decorator() and - // bli_l3_cntl_create_if()). This allows us to access the schemas from - // the control tree, which hopefully reduces some confusion, particularly - // in bli_packm_init(). - pack_t schema_a = bli_cntx_schema_a_block( cntx ); - pack_t schema_b = bli_cntx_schema_b_panel( cntx ); - - bli_obj_set_pack_schema( schema_a, &a_local ); - bli_obj_set_pack_schema( schema_b, &b_local ); - // Invoke the internal back-end. bli_l3_thread_decorator ( diff --git a/frame/base/bli_cntx.c b/frame/base/bli_cntx.c index 82952cc28..7c408ce8e 100644 --- a/frame/base/bli_cntx.c +++ b/frame/base/bli_cntx.c @@ -323,13 +323,14 @@ void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) // ----------------------------------------------------------------------------- -void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ) +void bli_cntx_set_ind_blkszs( ind_t method, num_t dt, dim_t n_bs, ... ) { /* Example prototypes: void bli_gks_cntx_set_ind_blkszs ( ind_t method != BLIS_NAT, + num_t dt, dim_t n_bs, bszid_t bs0_id, dim_t def_scalr0, dim_t max_scalr0, bszid_t bs1_id, dim_t def_scalr1, dim_t max_scalr1, @@ -346,6 +347,9 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ) dim_t i; err_t r_val; + // Project the given datatype to the real domain. This will be used later on. + num_t dt_real = bli_dt_proj_to_real( dt ); + // Return early if called with BLIS_NAT. if ( method == BLIS_NAT ) return; @@ -427,19 +431,17 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ) blksz_t* cntx_blksz = bli_cntx_get_blksz( bs_id, cntx ); blksz_t* cntx_bmult = bli_cntx_get_bmult( bs_id, cntx ); - // Copy the real domain values of the blksz_t object into the - // the complex domain slots of the same object. - bli_blksz_copy_dt( BLIS_FLOAT, cntx_blksz, BLIS_SCOMPLEX, cntx_blksz ); - bli_blksz_copy_dt( BLIS_DOUBLE, cntx_blksz, BLIS_DCOMPLEX, cntx_blksz ); + // Copy the real domain value of the blksz_t object into the + // corresponding complex domain slot of the same object. + bli_blksz_copy_dt( dt_real, cntx_blksz, dt, cntx_blksz ); // If the default blocksize scalar is non-unit, we need to scale // the complex domain default blocksizes. if ( dsclr != 1.0 ) { - // Scale the complex domain default blocksize values in the - // blocksize object. - bli_blksz_scale_def( 1, ( dim_t )dsclr, BLIS_SCOMPLEX, cntx_blksz ); - bli_blksz_scale_def( 1, ( dim_t )dsclr, BLIS_DCOMPLEX, cntx_blksz ); + // Scale the default blocksize value corresponding to the given + // datatype. + bli_blksz_scale_def( 1, ( dim_t )dsclr, dt, cntx_blksz ); // Perform rounding to ensure the newly scaled values are still // multiples of their register blocksize multiples. But only @@ -450,9 +452,8 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ) // such rounding. if ( bs_id != bm_id && method != BLIS_1M ) { - // Round the newly-scaled blocksizes down to their multiple. - bli_blksz_reduce_def_to( BLIS_FLOAT, cntx_bmult, BLIS_SCOMPLEX, cntx_blksz ); - bli_blksz_reduce_def_to( BLIS_DOUBLE, cntx_bmult, BLIS_DCOMPLEX, cntx_blksz ); + // Round the newly-scaled blocksize down to its multiple. + bli_blksz_reduce_def_to( dt_real, cntx_bmult, dt, cntx_blksz ); } } @@ -460,10 +461,9 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ) // to scale the complex domain maximum blocksizes. if ( msclr != 1.0 ) { - // Scale the complex domain maximum blocksize values in the - // blocksize object. - bli_blksz_scale_max( 1, ( dim_t )msclr, BLIS_SCOMPLEX, cntx_blksz ); - bli_blksz_scale_max( 1, ( dim_t )msclr, BLIS_DCOMPLEX, cntx_blksz ); + // Scale the maximum blocksize value corresponding to the given + // datatype. + bli_blksz_scale_max( 1, ( dim_t )msclr, dt, cntx_blksz ); // Perform rounding to ensure the newly scaled values are still // multiples of their register blocksize multiples. But only @@ -474,9 +474,8 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ) // such rounding. if ( bs_id != bm_id && method != BLIS_1M ) { - // Round the newly-scaled blocksizes down to their multiple. - bli_blksz_reduce_max_to( BLIS_FLOAT, cntx_bmult, BLIS_SCOMPLEX, cntx_blksz ); - bli_blksz_reduce_max_to( BLIS_DOUBLE, cntx_bmult, BLIS_DCOMPLEX, cntx_blksz ); + // Round the newly-scaled blocksize down to their multiple. + bli_blksz_reduce_max_to( dt_real, cntx_bmult, dt, cntx_blksz ); } } } diff --git a/frame/base/bli_cntx.h b/frame/base/bli_cntx.h index 998658d3b..76350f6bc 100644 --- a/frame/base/bli_cntx.h +++ b/frame/base/bli_cntx.h @@ -63,9 +63,6 @@ typedef struct cntx_s func_t* unpackm_kers; ind_t method; - pack_t schema_a; - pack_t schema_b; - pack_t schema_c; } cntx_t; */ @@ -136,18 +133,6 @@ BLIS_INLINE ind_t bli_cntx_method( cntx_t* cntx ) { return cntx->method; } -BLIS_INLINE pack_t bli_cntx_schema_a_block( cntx_t* cntx ) -{ - return cntx->schema_a_block; -} -BLIS_INLINE pack_t bli_cntx_schema_b_panel( cntx_t* cntx ) -{ - return cntx->schema_b_panel; -} -BLIS_INLINE pack_t bli_cntx_schema_c_panel( cntx_t* cntx ) -{ - return cntx->schema_c_panel; -} // ----------------------------------------------------------------------------- @@ -159,23 +144,6 @@ BLIS_INLINE void bli_cntx_set_method( ind_t method, cntx_t* cntx ) { cntx->method = method; } -BLIS_INLINE void bli_cntx_set_schema_a_block( pack_t schema, cntx_t* cntx ) -{ - cntx->schema_a_block = schema; -} -BLIS_INLINE void bli_cntx_set_schema_b_panel( pack_t schema, cntx_t* cntx ) -{ - cntx->schema_b_panel = schema; -} -BLIS_INLINE void bli_cntx_set_schema_c_panel( pack_t schema, cntx_t* cntx ) -{ - cntx->schema_c_panel = schema; -} -BLIS_INLINE void bli_cntx_set_schema_ab_blockpanel( pack_t sa, pack_t sb, cntx_t* cntx ) -{ - bli_cntx_set_schema_a_block( sa, cntx ); - bli_cntx_set_schema_b_panel( sb, cntx ); -} // ----------------------------------------------------------------------------- @@ -735,7 +703,7 @@ BLIS_EXPORT_BLIS void bli_cntx_clear( cntx_t* cntx ); BLIS_EXPORT_BLIS void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ); -BLIS_EXPORT_BLIS void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ); +BLIS_EXPORT_BLIS void bli_cntx_set_ind_blkszs( ind_t method, num_t dt, dim_t n_bs, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l3_vir_ukrs( dim_t n_ukrs, ... ); diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index c45ffcf84..c250191fc 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -50,7 +50,7 @@ static void_fp cntx_ref_init[ BLIS_NUM_ARCHS ]; // Define a function pointer type for context initialization functions. typedef void (*nat_cntx_init_ft)( cntx_t* cntx ); typedef void (*ref_cntx_init_ft)( cntx_t* cntx ); -typedef void (*ind_cntx_init_ft)( ind_t method, num_t dt, cntx_t* cntx ); +typedef void (*ind_cntx_init_ft)( ind_t method, cntx_t* cntx ); // ----------------------------------------------------------------------------- @@ -582,7 +582,7 @@ cntx_t* bli_gks_query_ind_cntx // function for the current induced method. (That function assumes // that the context is pre- initialized with values for native // execution.) - f( ind, dt, gks_id_ind ); + f( ind, gks_id_ind ); } } // END CRITICAL SECTION diff --git a/frame/include/bli_arch_config_pre.h b/frame/include/bli_arch_config_pre.h index 1ab0561d8..86c599230 100644 --- a/frame/include/bli_arch_config_pre.h +++ b/frame/include/bli_arch_config_pre.h @@ -69,7 +69,6 @@ void PASTEMAC2(cntx_init_,archname,BLIS_REF_SUFFIX) \ void PASTEMAC2(cntx_init_,archname,BLIS_IND_SUFFIX) \ ( \ ind_t method, \ - num_t dt, \ cntx_t* cntx \ ); diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 677022668..fe030f193 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -1523,9 +1523,6 @@ typedef struct cntx_s func_t unpackm_kers[ BLIS_NUM_UNPACKM_KERS ]; ind_t method; - pack_t schema_a_block; - pack_t schema_b_panel; - pack_t schema_c_panel; } cntx_t; diff --git a/frame/ind/cntx/bli_cntx_ind_stage.c b/frame/ind/cntx/bli_cntx_ind_stage.c index b5c15d5d7..0b315d215 100644 --- a/frame/ind/cntx/bli_cntx_ind_stage.c +++ b/frame/ind/cntx/bli_cntx_ind_stage.c @@ -74,18 +74,18 @@ void bli_cntx_3mh_stage( dim_t stage, cntx_t* cntx ) // Set the pack_t schemas as a function of the stage of execution. if ( stage == 0 ) { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RO, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RO, cntx ); + //bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RO, cntx ); + //bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RO, cntx ); } else if ( stage == 1 ) { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_IO, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_IO, cntx ); + //bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_IO, cntx ); + //bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_IO, cntx ); } else // if ( stage == 2 ) { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RPI, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RPI, cntx ); + //bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RPI, cntx ); + //bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RPI, cntx ); } } @@ -102,23 +102,23 @@ void bli_cntx_4mh_stage( dim_t stage, cntx_t* cntx ) // Set the pack_t schemas as a function of the stage of execution. if ( stage == 0 ) { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RO, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RO, cntx ); + //bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RO, cntx ); + //bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RO, cntx ); } else if ( stage == 1 ) { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_IO, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_IO, cntx ); + //bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_IO, cntx ); + //bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_IO, cntx ); } else if ( stage == 2 ) { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RO, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_IO, cntx ); + //bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RO, cntx ); + //bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_IO, cntx ); } else // if ( stage == 3 ) { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_IO, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RO, cntx ); + //bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_IO, cntx ); + //bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RO, cntx ); } } diff --git a/kernels/armsve/1m/bli_dpackm_armsve512_asm_10xk.c b/kernels/armsve/1m/bli_dpackm_armsve512_asm_10xk.c index 851363a9e..44718fa57 100644 --- a/kernels/armsve/1m/bli_dpackm_armsve512_asm_10xk.c +++ b/kernels/armsve/1m/bli_dpackm_armsve512_asm_10xk.c @@ -64,12 +64,11 @@ void bli_dpackm_armsve512_asm_10xk const bool unitk = bli_deq1( *kappa ); #ifdef _A64FX - if ( bli_cntx_schema_a_block(cntx) != bli_cntx_schema_b_panel(cntx) ) { - // A twisted way to infer whether A or B is being packed. - if ( schema == bli_cntx_schema_a_block(cntx) ) + // Infer whether A or B is being packed. + if ( schema == BLIS_PACKED_ROWS ) p = ( (uint64_t)0x1 << 56 ) | (uint64_t)p; - if ( schema == bli_cntx_schema_b_panel(cntx) ) + if ( schema == BLIS_PACKED_COLUMNS ) p = ( (uint64_t)0x2 << 56 ) | (uint64_t)p; } #endif diff --git a/kernels/armsve/1m/bli_dpackm_armsve512_asm_16xk.c b/kernels/armsve/1m/bli_dpackm_armsve512_asm_16xk.c index 38fb0b912..f02b87a7a 100644 --- a/kernels/armsve/1m/bli_dpackm_armsve512_asm_16xk.c +++ b/kernels/armsve/1m/bli_dpackm_armsve512_asm_16xk.c @@ -63,12 +63,11 @@ void bli_dpackm_armsve512_asm_16xk const bool unitk = bli_deq1( *kappa ); #ifdef _A64FX - if ( bli_cntx_schema_a_block(cntx) != bli_cntx_schema_b_panel(cntx) ) { - // A twisted way to infer whether A or B is being packed. - if ( schema == bli_cntx_schema_a_block(cntx) ) + // Infer whether A or B is being packed. + if ( schema == BLIS_PACKED_ROWS ) p = ( (uint64_t)0x1 << 56 ) | (uint64_t)p; - if ( schema == bli_cntx_schema_b_panel(cntx) ) + if ( schema == BLIS_PACKED_COLUMNS ) p = ( (uint64_t)0x2 << 56 ) | (uint64_t)p; } #endif diff --git a/ref_kernels/bli_cntx_ref.c b/ref_kernels/bli_cntx_ref.c index 29e5de95c..e1db540b0 100644 --- a/ref_kernels/bli_cntx_ref.c +++ b/ref_kernels/bli_cntx_ref.c @@ -334,7 +334,14 @@ PASTEMAC(c,opname), PASTEMAC(z,opname) ); \ } +// -- Helper function for 1m --------------------------------------------------- +void GENBAINAME(cntx_init_blkszs) + ( + ind_t method, + num_t dt, + cntx_t* cntx + ); // ----------------------------------------------------------------------------- @@ -589,10 +596,6 @@ void GENBARNAME(cntx_init) // -- Set miscellaneous fields --------------------------------------------- bli_cntx_set_method( BLIS_NAT, cntx ); - - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS, cntx ); - bli_cntx_set_schema_c_panel( BLIS_NOT_PACKED, cntx ); } // ----------------------------------------------------------------------------- @@ -600,7 +603,6 @@ void GENBARNAME(cntx_init) void GENBAINAME(cntx_init) ( ind_t method, - num_t dt, cntx_t* cntx ) { @@ -826,78 +828,12 @@ void GENBAINAME(cntx_init) } else if ( method == BLIS_1M ) { - const bool is_pb = FALSE; + //const bool is_pb = FALSE; - // We MUST set the induced method in the context prior to calling - // bli_cntx_l3_ukr_prefers_cols_dt() because that function queries - // the induced method. It needs the induced method value in order - // to determine whether to evaluate the "prefers column storage" - // predicate using the storage preference of the kernel for dt, or - // the storage preference of the kernel for the real projection of - // dt. Failing to set the induced method here can lead to strange - // undefined behavior at runtime if the native complex kernel's - // storage preference happens to not equal that of the native real - // kernel. - bli_cntx_set_method( method, cntx ); - - // Initialize the blocksizes according to the micro-kernel preference as - // well as the algorithm. - if ( bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ) ) - { - // This branch is used for algorithms 1m_c_bp, 1m_r_pb. - - // Set the pack_t schemas for the c_bp or r_pb algorithms. - if ( !is_pb ) - { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_1E, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_1R, cntx ); - } - else // if ( is_pb ) - { - bli_cntx_set_schema_b_panel( BLIS_PACKED_ROW_PANELS_1R, cntx ); - bli_cntx_set_schema_a_block( BLIS_PACKED_COL_PANELS_1E, cntx ); - } - - bli_cntx_set_ind_blkszs - ( - method, 6, - BLIS_NC, 1.0, 1.0, - BLIS_KC, 2.0, 2.0, // halve kc... - BLIS_MC, 2.0, 2.0, // halve mc... - BLIS_NR, 1.0, 1.0, - BLIS_MR, 2.0, 1.0, // ...and mr (but NOT packmr) - BLIS_KR, 1.0, 1.0, - cntx - ); - } - else // if ( bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, BLIS_GEMM_UKR, cntx ) ) - { - // This branch is used for algorithms 1m_r_bp, 1m_c_pb. - - // Set the pack_t schemas for the r_bp or c_pb algorithms. - if ( !is_pb ) - { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_1R, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_1E, cntx ); - } - else // if ( is_pb ) - { - bli_cntx_set_schema_b_panel( BLIS_PACKED_ROW_PANELS_1E, cntx ); - bli_cntx_set_schema_a_block( BLIS_PACKED_COL_PANELS_1R, cntx ); - } - - bli_cntx_set_ind_blkszs - ( - method, 6, - BLIS_NC, 2.0, 2.0, // halve nc... - BLIS_KC, 2.0, 2.0, // halve kc... - BLIS_MC, 1.0, 1.0, - BLIS_NR, 2.0, 1.0, // ...and nr (but NOT packnr) - BLIS_MR, 1.0, 1.0, - BLIS_KR, 1.0, 1.0, - cntx - ); - } + // Call a helper function to initialize blocksizes for each complex + // datatype. + GENBAINAME(cntx_init_blkszs)( method, BLIS_SCOMPLEX, cntx ); + GENBAINAME(cntx_init_blkszs)( method, BLIS_DCOMPLEX, cntx ); } else // if ( method == BLIS_NAT ) { @@ -913,8 +849,8 @@ void GENBAINAME(cntx_init) } else if ( method == BLIS_3M1 ) { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_3MI, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_3MI, cntx ); + //bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_3MI, cntx ); + //bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_3MI, cntx ); } else if ( method == BLIS_4MH ) { @@ -922,8 +858,8 @@ void GENBAINAME(cntx_init) } else if ( method == BLIS_4M1A || method == BLIS_4M1B ) { - bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_4MI, cntx ); - bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_4MI, cntx ); + //bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_4MI, cntx ); + //bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_4MI, cntx ); } else if ( method == BLIS_1M ) { @@ -942,3 +878,60 @@ void GENBAINAME(cntx_init) } } +// ----------------------------------------------------------------------------- + +void GENBAINAME(cntx_init_blkszs) + ( + ind_t method, + num_t dt, + cntx_t* cntx + ) +{ + // We MUST set the induced method in the context prior to calling + // bli_cntx_l3_vir_ukr_prefers_cols_dt() because that function queries + // the induced method. That function needs the induced method value in + // order to determine whether to evaluate the "prefers column storage" + // predicate using the storage preference of the kernel for dt, or + // the storage preference of the kernel for the real projection of + // dt. Failing to set the induced method here can lead to strange + // undefined behavior at runtime if the native complex kernel's + // storage preference happens to not equal that of the native real + // kernel. + bli_cntx_set_method( method, cntx ); + + // Initialize the blocksizes according to the micro-kernel preference as + // well as the algorithm. + if ( bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ) ) + { + // This branch is used for algorithm 1m_c_bp. + + bli_cntx_set_ind_blkszs + ( + method, dt, 6, + BLIS_NC, 1.0, 1.0, + BLIS_KC, 2.0, 2.0, // halve kc... + BLIS_MC, 2.0, 2.0, // halve mc... + BLIS_NR, 1.0, 1.0, + BLIS_MR, 2.0, 1.0, // ...and mr (but NOT packmr) + BLIS_KR, 1.0, 1.0, + cntx + ); + } + else // if ( bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, BLIS_GEMM_UKR, cntx ) ) + { + // This branch is used for algorithm 1m_r_bp. + + bli_cntx_set_ind_blkszs + ( + method, dt, 6, + BLIS_NC, 2.0, 2.0, // halve nc... + BLIS_KC, 2.0, 2.0, // halve kc... + BLIS_MC, 1.0, 1.0, + BLIS_NR, 2.0, 1.0, // ...and nr (but NOT packnr) + BLIS_MR, 1.0, 1.0, + BLIS_KR, 1.0, 1.0, + cntx + ); + } +} + diff --git a/ref_kernels/ind/bli_gemmtrsm1m_ref.c b/ref_kernels/ind/bli_gemmtrsm1m_ref.c index 7def665de..5cfaee9ec 100644 --- a/ref_kernels/ind/bli_gemmtrsm1m_ref.c +++ b/ref_kernels/ind/bli_gemmtrsm1m_ref.c @@ -78,7 +78,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ \ const dim_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ - const pack_t schema_b = bli_cntx_schema_b_panel( cntx ); \ + const pack_t schema_b = bli_auxinfo_schema_b( data ); \ \ const dim_t k2 = 2 * k; \ \ diff --git a/ref_kernels/ind/bli_trsm1m_ref.c b/ref_kernels/ind/bli_trsm1m_ref.c index a89d8b90d..68717f7a6 100644 --- a/ref_kernels/ind/bli_trsm1m_ref.c +++ b/ref_kernels/ind/bli_trsm1m_ref.c @@ -67,7 +67,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ const inc_t ld_a = cs_a; \ const inc_t ld_b = rs_b; \ \ - const pack_t schema_b = bli_cntx_schema_b_panel( cntx ); \ + const pack_t schema_b = bli_auxinfo_schema_b( data ); \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ @@ -277,7 +277,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ const inc_t ld_a = cs_a; \ const inc_t ld_b = rs_b; \ \ - const pack_t schema_b = bli_cntx_schema_b_panel( cntx ); \ + const pack_t schema_b = bli_auxinfo_schema_b( data ); \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ diff --git a/testsuite/src/test_gemm.c b/testsuite/src/test_gemm.c index f485829a1..65f910f9b 100644 --- a/testsuite/src/test_gemm.c +++ b/testsuite/src/test_gemm.c @@ -447,9 +447,12 @@ void libblis_test_gemm_impl #if 0 //bli_printm( "alpha", alpha, "%5.2f", "" ); //bli_printm( "beta", beta, "%5.2f", "" ); +if ( bli_obj_dt( c ) == BLIS_DCOMPLEX ) +{ bli_printm( "a", a, "%5.2f", "" ); bli_printm( "b", b, "%5.2f", "" ); bli_printm( "c", c, "%5.2f", "" ); +} #endif //if ( bli_obj_length( b ) == 16 && // bli_obj_stor3_from_strides( c, a, b ) == BLIS_CRR ) @@ -457,8 +460,7 @@ bli_printm( "c", c, "%5.2f", "" ); bli_gemm( alpha, a, b, beta, c ); //bls_gemm( alpha, a, b, beta, c ); #if 0 -if ( bli_obj_length( c ) == 12 && - bli_obj_stor3_from_strides( c, a, b ) == BLIS_RRR ) +if ( bli_obj_dt( c ) == BLIS_DCOMPLEX ) bli_printm( "c after", c, "%6.3f", "" ); #endif //bli_printm( "c after", c, "%5.2f", "" ); diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index a8ffb6d59..f5bfd0f72 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -1790,8 +1790,8 @@ void libblis_test_op_driver } } - // Enumerate all combinations of datatype domains requested, but only - // for the gemm operation. + // Enumerate all combinations of datatypes requested, but only for the + // gemm operation. if ( !mixed_domain && mixed_precision && op->opid == BLIS_GEMM ) {