mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Renamed 3m and 4m symbols/macros to 3mi and 4mi.
Details: - Renamed several variables and macros from 3m/4m to 3mi/4mi. This is because those packing schemas were always implicitly "interleaved". This new naming scheme will make way for new schemas that separate instead of interleve the real and imaginary (and summed) parts. - Expanded the pack format sub-field of the pack schema field of the info_t to 4 bits (from 3). This will allow for more schema types going forward. - Removed old _cntl.c files for herk3m, herk4m, trmm3m, trmm4m.
This commit is contained in:
@@ -152,8 +152,8 @@ void bli_packm_blk_var2( obj_t* c,
|
||||
|
||||
|
||||
// Choose the correct func_t object based on the pack_t schema.
|
||||
if ( bli_is_4m_packed( schema ) ) packm_kers = packm_struc_cxk_4m_kers;
|
||||
else if ( bli_is_3m_packed( schema ) ) packm_kers = packm_struc_cxk_3m_kers;
|
||||
if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4m_kers;
|
||||
else if ( bli_is_3mi_packed( schema ) ) packm_kers = packm_struc_cxk_3m_kers;
|
||||
else if ( bli_is_ro_packed( schema ) ||
|
||||
bli_is_io_packed( schema ) ||
|
||||
bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers;
|
||||
@@ -330,7 +330,7 @@ void PASTEMAC(ch,varname)( \
|
||||
1/2. In both cases, we are compensating for the fact that pointer
|
||||
arithmetic occurs in terms of complex elements rather than real
|
||||
elements. */ \
|
||||
if ( bli_is_3m_packed( schema ) ) { ss_num = 3; ss_den = 2; } \
|
||||
if ( bli_is_3mi_packed( schema ) ) { ss_num = 3; ss_den = 2; } \
|
||||
else if ( bli_is_rih_packed( schema ) ) { ss_num = 1; ss_den = 2; } \
|
||||
else { ss_num = 1; ss_den = 1; } \
|
||||
\
|
||||
|
||||
@@ -362,7 +362,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
// Why? Because the macro-kernel indexes in units of the complex
|
||||
// datatype. So these changes "trick" it into indexing the correct
|
||||
// amount.
|
||||
if ( bli_is_3m_packed( pack_schema ) )
|
||||
if ( bli_is_3mi_packed( pack_schema ) )
|
||||
{
|
||||
ps_p = ( ps_p * 3 ) / 2;
|
||||
|
||||
@@ -460,7 +460,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
// Why? Because the macro-kernel indexes in units of the complex
|
||||
// datatype. So these changes "trick" it into indexing the correct
|
||||
// amount.
|
||||
if ( bli_is_3m_packed( pack_schema ) )
|
||||
if ( bli_is_3mi_packed( pack_schema ) )
|
||||
{
|
||||
ps_p = ( ps_p * 3 ) / 2;
|
||||
|
||||
|
||||
@@ -134,7 +134,7 @@ void bli_gemm3m_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_3M,
|
||||
BLIS_PACKED_ROW_PANELS_3MI,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
gemm3m_packb_cntl
|
||||
@@ -146,7 +146,7 @@ void bli_gemm3m_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_3M,
|
||||
BLIS_PACKED_COL_PANELS_3MI,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ void bli_gemm4m_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_4M,
|
||||
BLIS_PACKED_ROW_PANELS_4MI,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
gemm4m_packb_cntl
|
||||
@@ -143,7 +143,7 @@ void bli_gemm4m_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_4M,
|
||||
BLIS_PACKED_COL_PANELS_4MI,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
|
||||
|
||||
@@ -125,7 +125,7 @@ void bli_gemm4mb_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_4M,
|
||||
BLIS_PACKED_ROW_PANELS_4MI,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
gemm4mb_packb_cntl
|
||||
@@ -137,7 +137,7 @@ void bli_gemm4mb_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_4M,
|
||||
BLIS_PACKED_COL_PANELS_4MI,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
|
||||
|
||||
@@ -1,156 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
|
||||
extern blksz_t* gemm3m_mc;
|
||||
extern blksz_t* gemm3m_nc;
|
||||
extern blksz_t* gemm3m_kc;
|
||||
extern blksz_t* gemm3m_mr;
|
||||
extern blksz_t* gemm3m_nr;
|
||||
extern blksz_t* gemm3m_kr;
|
||||
|
||||
extern func_t* gemm3m_ukrs;
|
||||
|
||||
packm_t* herk3m_packa_cntl;
|
||||
packm_t* herk3m_packb_cntl;
|
||||
|
||||
herk_t* herk3m_cntl_bp_ke;
|
||||
herk_t* herk3m_cntl_op_bp;
|
||||
herk_t* herk3m_cntl_mm_op;
|
||||
herk_t* herk3m_cntl_vl_mm;
|
||||
|
||||
herk_t* herk3m_cntl;
|
||||
|
||||
|
||||
void bli_herk3m_cntl_init()
|
||||
{
|
||||
// Create control tree objects for packm operations.
|
||||
herk3m_packa_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm3m_mr,
|
||||
gemm3m_kr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_3M,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
herk3m_packb_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm3m_kr,
|
||||
gemm3m_nr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_3M,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
herk3m_cntl_bp_ke
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_UNB_OPT,
|
||||
BLIS_VARIANT2,
|
||||
NULL,
|
||||
gemm3m_ukrs,
|
||||
NULL, NULL, NULL,
|
||||
NULL, NULL, NULL );
|
||||
|
||||
// Create control tree object for outer panel (to block-panel)
|
||||
// problem.
|
||||
herk3m_cntl_op_bp
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemm3m_mc,
|
||||
gemm3m_ukrs,
|
||||
NULL,
|
||||
herk3m_packa_cntl,
|
||||
herk3m_packb_cntl,
|
||||
NULL,
|
||||
herk3m_cntl_bp_ke,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for general problem via multiple
|
||||
// rank-k (outer panel) updates.
|
||||
herk3m_cntl_mm_op
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
gemm3m_kc,
|
||||
gemm3m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
herk3m_cntl_op_bp,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for very large problem via multiple
|
||||
// general problems.
|
||||
herk3m_cntl_vl_mm
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm3m_nc,
|
||||
gemm3m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
herk3m_cntl_mm_op,
|
||||
NULL );
|
||||
|
||||
// Alias the "master" herk control tree to a shorter name.
|
||||
herk3m_cntl = herk3m_cntl_vl_mm;
|
||||
}
|
||||
|
||||
void bli_herk3m_cntl_finalize()
|
||||
{
|
||||
bli_cntl_obj_free( herk3m_packa_cntl );
|
||||
bli_cntl_obj_free( herk3m_packb_cntl );
|
||||
|
||||
bli_cntl_obj_free( herk3m_cntl_bp_ke );
|
||||
bli_cntl_obj_free( herk3m_cntl_op_bp );
|
||||
bli_cntl_obj_free( herk3m_cntl_mm_op );
|
||||
bli_cntl_obj_free( herk3m_cntl_vl_mm );
|
||||
}
|
||||
|
||||
@@ -1,156 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
|
||||
extern blksz_t* gemm4m_mc;
|
||||
extern blksz_t* gemm4m_nc;
|
||||
extern blksz_t* gemm4m_kc;
|
||||
extern blksz_t* gemm4m_mr;
|
||||
extern blksz_t* gemm4m_nr;
|
||||
extern blksz_t* gemm4m_kr;
|
||||
|
||||
extern func_t* gemm4m_ukrs;
|
||||
|
||||
packm_t* herk4m_packa_cntl;
|
||||
packm_t* herk4m_packb_cntl;
|
||||
|
||||
herk_t* herk4m_cntl_bp_ke;
|
||||
herk_t* herk4m_cntl_op_bp;
|
||||
herk_t* herk4m_cntl_mm_op;
|
||||
herk_t* herk4m_cntl_vl_mm;
|
||||
|
||||
herk_t* herk4m_cntl;
|
||||
|
||||
|
||||
void bli_herk4m_cntl_init()
|
||||
{
|
||||
// Create control tree objects for packm operations.
|
||||
herk4m_packa_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm4m_mr,
|
||||
gemm4m_kr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_4M,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
herk4m_packb_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm4m_kr,
|
||||
gemm4m_nr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_4M,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
herk4m_cntl_bp_ke
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_UNB_OPT,
|
||||
BLIS_VARIANT2,
|
||||
NULL,
|
||||
gemm4m_ukrs,
|
||||
NULL, NULL, NULL,
|
||||
NULL, NULL, NULL );
|
||||
|
||||
// Create control tree object for outer panel (to block-panel)
|
||||
// problem.
|
||||
herk4m_cntl_op_bp
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemm4m_mc,
|
||||
gemm4m_ukrs,
|
||||
NULL,
|
||||
herk4m_packa_cntl,
|
||||
herk4m_packb_cntl,
|
||||
NULL,
|
||||
herk4m_cntl_bp_ke,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for general problem via multiple
|
||||
// rank-k (outer panel) updates.
|
||||
herk4m_cntl_mm_op
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
gemm4m_kc,
|
||||
gemm4m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
herk4m_cntl_op_bp,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for very large problem via multiple
|
||||
// general problems.
|
||||
herk4m_cntl_vl_mm
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm4m_nc,
|
||||
gemm4m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
herk4m_cntl_mm_op,
|
||||
NULL );
|
||||
|
||||
// Alias the "master" herk control tree to a shorter name.
|
||||
herk4m_cntl = herk4m_cntl_vl_mm;
|
||||
}
|
||||
|
||||
void bli_herk4m_cntl_finalize()
|
||||
{
|
||||
bli_cntl_obj_free( herk4m_packa_cntl );
|
||||
bli_cntl_obj_free( herk4m_packb_cntl );
|
||||
|
||||
bli_cntl_obj_free( herk4m_cntl_bp_ke );
|
||||
bli_cntl_obj_free( herk4m_cntl_op_bp );
|
||||
bli_cntl_obj_free( herk4m_cntl_mm_op );
|
||||
bli_cntl_obj_free( herk4m_cntl_vl_mm );
|
||||
}
|
||||
|
||||
@@ -1,258 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
|
||||
extern blksz_t* gemm3m_mc;
|
||||
extern blksz_t* gemm3m_nc;
|
||||
extern blksz_t* gemm3m_kc;
|
||||
extern blksz_t* gemm3m_mr;
|
||||
extern blksz_t* gemm3m_nr;
|
||||
extern blksz_t* gemm3m_kr;
|
||||
|
||||
extern func_t* gemm3m_ukrs;
|
||||
|
||||
extern gemm_t* gemm3m_cntl_bp_ke;
|
||||
|
||||
packm_t* trmm3m_l_packa_cntl;
|
||||
packm_t* trmm3m_l_packb_cntl;
|
||||
|
||||
packm_t* trmm3m_r_packa_cntl;
|
||||
packm_t* trmm3m_r_packb_cntl;
|
||||
|
||||
gemm_t* trmm3m_cntl_bp_ke;
|
||||
|
||||
gemm_t* trmm3m_l_cntl_op_bp;
|
||||
gemm_t* trmm3m_l_cntl_mm_op;
|
||||
gemm_t* trmm3m_l_cntl_vl_mm;
|
||||
|
||||
gemm_t* trmm3m_r_cntl_op_bp;
|
||||
gemm_t* trmm3m_r_cntl_mm_op;
|
||||
gemm_t* trmm3m_r_cntl_vl_mm;
|
||||
|
||||
gemm_t* trmm3m_l_cntl;
|
||||
gemm_t* trmm3m_r_cntl;
|
||||
|
||||
|
||||
void bli_trmm3m_cntl_init()
|
||||
{
|
||||
// Create control tree objects for packm operations (left side).
|
||||
trmm3m_l_packa_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: for consistency with trsm, "k" dim
|
||||
// multiple is set to mr.
|
||||
gemm3m_mr,
|
||||
gemm3m_kr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_3M,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
trmm3m_l_packb_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: m dim multiple here must be mr
|
||||
// since "k" dim multiple is set to mr above.
|
||||
gemm3m_kr,
|
||||
gemm3m_nr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_3M,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm operations (right side).
|
||||
trmm3m_r_packa_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: for consistency with trsm, "k" dim
|
||||
// multiple is set to nr.
|
||||
gemm3m_mr,
|
||||
gemm3m_nr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_3M,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
trmm3m_r_packb_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: m dim multiple here must be nr
|
||||
// since "k" dim multiple is set to nr above.
|
||||
gemm3m_nr,
|
||||
gemm3m_nr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_3M,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
trmm3m_cntl_bp_ke
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_UNB_OPT,
|
||||
BLIS_VARIANT2,
|
||||
NULL,
|
||||
gemm3m_ukrs,
|
||||
NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL );
|
||||
|
||||
// Create control tree object for outer panel (to block-panel)
|
||||
// problem (left side).
|
||||
trmm3m_l_cntl_op_bp
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemm3m_mc,
|
||||
gemm3m_ukrs,
|
||||
NULL,
|
||||
trmm3m_l_packa_cntl,
|
||||
trmm3m_l_packb_cntl,
|
||||
NULL,
|
||||
trmm3m_cntl_bp_ke,
|
||||
gemm3m_cntl_bp_ke,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for general problem via multiple
|
||||
// rank-k (outer panel) updates (left side).
|
||||
trmm3m_l_cntl_mm_op
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
gemm3m_kc,
|
||||
gemm3m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm3m_l_cntl_op_bp,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for very large problem via multiple
|
||||
// general problems (left side).
|
||||
trmm3m_l_cntl_vl_mm
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm3m_nc,
|
||||
gemm3m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm3m_l_cntl_mm_op,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for outer panel (to block-panel)
|
||||
// problem (right side).
|
||||
trmm3m_r_cntl_op_bp
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemm3m_mc,
|
||||
gemm3m_ukrs,
|
||||
NULL,
|
||||
trmm3m_r_packa_cntl,
|
||||
trmm3m_r_packb_cntl,
|
||||
NULL,
|
||||
trmm3m_cntl_bp_ke,
|
||||
gemm3m_cntl_bp_ke,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for general problem via multiple
|
||||
// rank-k (outer panel) updates (right side).
|
||||
trmm3m_r_cntl_mm_op
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
gemm3m_kc,
|
||||
gemm3m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm3m_r_cntl_op_bp,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for very large problem via multiple
|
||||
// general problems (right side).
|
||||
trmm3m_r_cntl_vl_mm
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm3m_nc,
|
||||
gemm3m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm3m_r_cntl_mm_op,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Alias the "master" trmm control trees to shorter names.
|
||||
trmm3m_l_cntl = trmm3m_l_cntl_vl_mm;
|
||||
trmm3m_r_cntl = trmm3m_r_cntl_vl_mm;
|
||||
}
|
||||
|
||||
void bli_trmm3m_cntl_finalize()
|
||||
{
|
||||
bli_cntl_obj_free( trmm3m_l_packa_cntl );
|
||||
bli_cntl_obj_free( trmm3m_l_packb_cntl );
|
||||
bli_cntl_obj_free( trmm3m_r_packa_cntl );
|
||||
bli_cntl_obj_free( trmm3m_r_packb_cntl );
|
||||
|
||||
bli_cntl_obj_free( trmm3m_cntl_bp_ke );
|
||||
|
||||
bli_cntl_obj_free( trmm3m_l_cntl_op_bp );
|
||||
bli_cntl_obj_free( trmm3m_l_cntl_mm_op );
|
||||
bli_cntl_obj_free( trmm3m_l_cntl_vl_mm );
|
||||
bli_cntl_obj_free( trmm3m_r_cntl_op_bp );
|
||||
bli_cntl_obj_free( trmm3m_r_cntl_mm_op );
|
||||
bli_cntl_obj_free( trmm3m_r_cntl_vl_mm );
|
||||
}
|
||||
|
||||
@@ -1,258 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
|
||||
extern blksz_t* gemm4m_mc;
|
||||
extern blksz_t* gemm4m_nc;
|
||||
extern blksz_t* gemm4m_kc;
|
||||
extern blksz_t* gemm4m_mr;
|
||||
extern blksz_t* gemm4m_nr;
|
||||
extern blksz_t* gemm4m_kr;
|
||||
|
||||
extern func_t* gemm4m_ukrs;
|
||||
|
||||
extern gemm_t* gemm4m_cntl_bp_ke;
|
||||
|
||||
packm_t* trmm4m_l_packa_cntl;
|
||||
packm_t* trmm4m_l_packb_cntl;
|
||||
|
||||
packm_t* trmm4m_r_packa_cntl;
|
||||
packm_t* trmm4m_r_packb_cntl;
|
||||
|
||||
gemm_t* trmm4m_cntl_bp_ke;
|
||||
|
||||
gemm_t* trmm4m_l_cntl_op_bp;
|
||||
gemm_t* trmm4m_l_cntl_mm_op;
|
||||
gemm_t* trmm4m_l_cntl_vl_mm;
|
||||
|
||||
gemm_t* trmm4m_r_cntl_op_bp;
|
||||
gemm_t* trmm4m_r_cntl_mm_op;
|
||||
gemm_t* trmm4m_r_cntl_vl_mm;
|
||||
|
||||
gemm_t* trmm4m_l_cntl;
|
||||
gemm_t* trmm4m_r_cntl;
|
||||
|
||||
|
||||
void bli_trmm4m_cntl_init()
|
||||
{
|
||||
// Create control tree objects for packm operations (left side).
|
||||
trmm4m_l_packa_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: for consistency with trsm, "k" dim
|
||||
// multiple is set to mr.
|
||||
gemm4m_mr,
|
||||
gemm4m_kr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_4M,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
trmm4m_l_packb_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: m dim multiple here must be mr
|
||||
// since "k" dim multiple is set to mr above.
|
||||
gemm4m_kr,
|
||||
gemm4m_nr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_4M,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm operations (right side).
|
||||
trmm4m_r_packa_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: for consistency with trsm, "k" dim
|
||||
// multiple is set to nr.
|
||||
gemm4m_mr,
|
||||
gemm4m_nr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_4M,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
trmm4m_r_packb_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: m dim multiple here must be nr
|
||||
// since "k" dim multiple is set to nr above.
|
||||
gemm4m_nr,
|
||||
gemm4m_nr,
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_4M,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
trmm4m_cntl_bp_ke
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_UNB_OPT,
|
||||
BLIS_VARIANT2,
|
||||
NULL,
|
||||
gemm4m_ukrs,
|
||||
NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL );
|
||||
|
||||
// Create control tree object for outer panel (to block-panel)
|
||||
// problem (left side).
|
||||
trmm4m_l_cntl_op_bp
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemm4m_mc,
|
||||
gemm4m_ukrs,
|
||||
NULL,
|
||||
trmm4m_l_packa_cntl,
|
||||
trmm4m_l_packb_cntl,
|
||||
NULL,
|
||||
trmm4m_cntl_bp_ke,
|
||||
gemm4m_cntl_bp_ke,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for general problem via multiple
|
||||
// rank-k (outer panel) updates (left side).
|
||||
trmm4m_l_cntl_mm_op
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
gemm4m_kc,
|
||||
gemm4m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm4m_l_cntl_op_bp,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for very large problem via multiple
|
||||
// general problems (left side).
|
||||
trmm4m_l_cntl_vl_mm
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm4m_nc,
|
||||
gemm4m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm4m_l_cntl_mm_op,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for outer panel (to block-panel)
|
||||
// problem (right side).
|
||||
trmm4m_r_cntl_op_bp
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemm4m_mc,
|
||||
gemm4m_ukrs,
|
||||
NULL,
|
||||
trmm4m_r_packa_cntl,
|
||||
trmm4m_r_packb_cntl,
|
||||
NULL,
|
||||
trmm4m_cntl_bp_ke,
|
||||
gemm4m_cntl_bp_ke,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for general problem via multiple
|
||||
// rank-k (outer panel) updates (right side).
|
||||
trmm4m_r_cntl_mm_op
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
gemm4m_kc,
|
||||
gemm4m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm4m_r_cntl_op_bp,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for very large problem via multiple
|
||||
// general problems (right side).
|
||||
trmm4m_r_cntl_vl_mm
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
gemm4m_nc,
|
||||
gemm4m_ukrs,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm4m_r_cntl_mm_op,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
// Alias the "master" trmm control trees to shorter names.
|
||||
trmm4m_l_cntl = trmm4m_l_cntl_vl_mm;
|
||||
trmm4m_r_cntl = trmm4m_r_cntl_vl_mm;
|
||||
}
|
||||
|
||||
void bli_trmm4m_cntl_finalize()
|
||||
{
|
||||
bli_cntl_obj_free( trmm4m_l_packa_cntl );
|
||||
bli_cntl_obj_free( trmm4m_l_packb_cntl );
|
||||
bli_cntl_obj_free( trmm4m_r_packa_cntl );
|
||||
bli_cntl_obj_free( trmm4m_r_packb_cntl );
|
||||
|
||||
bli_cntl_obj_free( trmm4m_cntl_bp_ke );
|
||||
|
||||
bli_cntl_obj_free( trmm4m_l_cntl_op_bp );
|
||||
bli_cntl_obj_free( trmm4m_l_cntl_mm_op );
|
||||
bli_cntl_obj_free( trmm4m_l_cntl_vl_mm );
|
||||
bli_cntl_obj_free( trmm4m_r_cntl_op_bp );
|
||||
bli_cntl_obj_free( trmm4m_r_cntl_mm_op );
|
||||
bli_cntl_obj_free( trmm4m_r_cntl_vl_mm );
|
||||
}
|
||||
|
||||
@@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \
|
||||
needs to occur in units of real values. The value computed
|
||||
here is divided into the complex pointer offset to cause the
|
||||
pointer to be advanced by the correct value. */ \
|
||||
if ( bli_is_4m_packed( schema_a ) || \
|
||||
bli_is_3m_packed( schema_a ) || \
|
||||
if ( bli_is_4mi_packed( schema_a ) || \
|
||||
bli_is_3mi_packed( schema_a ) || \
|
||||
bli_is_rih_packed( schema_a ) ) off_scl = 2; \
|
||||
else off_scl = 1; \
|
||||
\
|
||||
@@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \
|
||||
summed-only, we need to scale the computed panel sizes by 1/2
|
||||
to compensate for the fact that the pointer arithmetic occurs
|
||||
in terms of complex elements rather than real elements. */ \
|
||||
if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
|
||||
if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
|
||||
else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \
|
||||
else { ss_a_num = 1; ss_a_den = 1; } \
|
||||
\
|
||||
|
||||
@@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \
|
||||
needs to occur in units of real values. The value computed
|
||||
here is divided into the complex pointer offset to cause the
|
||||
pointer to be advanced by the correct value. */ \
|
||||
if ( bli_is_4m_packed( schema_a ) || \
|
||||
bli_is_3m_packed( schema_a ) || \
|
||||
if ( bli_is_4mi_packed( schema_a ) || \
|
||||
bli_is_3mi_packed( schema_a ) || \
|
||||
bli_is_rih_packed( schema_a ) ) off_scl = 2; \
|
||||
else off_scl = 1; \
|
||||
\
|
||||
@@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \
|
||||
summed-only, we need to scale the computed panel sizes by 1/2
|
||||
to compensate for the fact that the pointer arithmetic occurs
|
||||
in terms of complex elements rather than real elements. */ \
|
||||
if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
|
||||
if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
|
||||
else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \
|
||||
else { ss_a_num = 1; ss_a_den = 1; } \
|
||||
\
|
||||
|
||||
@@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \
|
||||
needs to occur in units of real values. The value computed
|
||||
here is divided into the complex pointer offset to cause the
|
||||
pointer to be advanced by the correct value. */ \
|
||||
if ( bli_is_4m_packed( schema_b ) || \
|
||||
bli_is_3m_packed( schema_b ) || \
|
||||
if ( bli_is_4mi_packed( schema_b ) || \
|
||||
bli_is_3mi_packed( schema_b ) || \
|
||||
bli_is_rih_packed( schema_b ) ) off_scl = 2; \
|
||||
else off_scl = 1; \
|
||||
\
|
||||
@@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \
|
||||
summed-only, we need to scale the computed panel sizes by 1/2
|
||||
to compensate for the fact that the pointer arithmetic occurs
|
||||
in terms of complex elements rather than real elements. */ \
|
||||
if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
|
||||
if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
|
||||
else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \
|
||||
else { ss_b_num = 1; ss_b_den = 1; } \
|
||||
\
|
||||
|
||||
@@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \
|
||||
needs to occur in units of real values. The value computed
|
||||
here is divided into the complex pointer offset to cause the
|
||||
pointer to be advanced by the correct value. */ \
|
||||
if ( bli_is_4m_packed( schema_b ) || \
|
||||
bli_is_3m_packed( schema_b ) || \
|
||||
if ( bli_is_4mi_packed( schema_b ) || \
|
||||
bli_is_3mi_packed( schema_b ) || \
|
||||
bli_is_rih_packed( schema_b ) ) off_scl = 2; \
|
||||
else off_scl = 1; \
|
||||
\
|
||||
@@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \
|
||||
summed-only, we need to scale the computed panel sizes by 1/2
|
||||
to compensate for the fact that the pointer arithmetic occurs
|
||||
in terms of complex elements rather than real elements. */ \
|
||||
if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
|
||||
if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
|
||||
else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \
|
||||
else { ss_b_num = 1; ss_b_den = 1; } \
|
||||
\
|
||||
|
||||
@@ -120,7 +120,7 @@ void bli_trsm3m_cntl_init()
|
||||
TRUE, // invert diagonal
|
||||
TRUE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_3M,
|
||||
BLIS_PACKED_ROW_PANELS_3MI,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
trsm3m_l_packb_cntl
|
||||
@@ -134,7 +134,7 @@ void bli_trsm3m_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_3M,
|
||||
BLIS_PACKED_COL_PANELS_3MI,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm operations (right side).
|
||||
@@ -147,7 +147,7 @@ void bli_trsm3m_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_3M,
|
||||
BLIS_PACKED_ROW_PANELS_3MI,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
trsm3m_r_packb_cntl
|
||||
@@ -159,7 +159,7 @@ void bli_trsm3m_cntl_init()
|
||||
TRUE, // invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
TRUE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_3M,
|
||||
BLIS_PACKED_COL_PANELS_3MI,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
|
||||
|
||||
@@ -120,7 +120,7 @@ void bli_trsm4m_cntl_init()
|
||||
TRUE, // invert diagonal
|
||||
TRUE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_4M,
|
||||
BLIS_PACKED_ROW_PANELS_4MI,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
trsm4m_l_packb_cntl
|
||||
@@ -134,7 +134,7 @@ void bli_trsm4m_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_4M,
|
||||
BLIS_PACKED_COL_PANELS_4MI,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm operations (right side).
|
||||
@@ -147,7 +147,7 @@ void bli_trsm4m_cntl_init()
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS_4M,
|
||||
BLIS_PACKED_ROW_PANELS_4MI,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
trsm4m_r_packb_cntl
|
||||
@@ -159,7 +159,7 @@ void bli_trsm4m_cntl_init()
|
||||
TRUE, // invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
TRUE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS_4M,
|
||||
BLIS_PACKED_COL_PANELS_4MI,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
|
||||
|
||||
@@ -255,8 +255,8 @@ void PASTEMAC(ch,varname)( \
|
||||
needs to occur in units of real values. The value computed
|
||||
here is divided into the complex pointer offset to cause the
|
||||
pointer to be advanced by the correct value. */ \
|
||||
if ( bli_is_4m_packed( schema_a ) || \
|
||||
bli_is_3m_packed( schema_a ) || \
|
||||
if ( bli_is_4mi_packed( schema_a ) || \
|
||||
bli_is_3mi_packed( schema_a ) || \
|
||||
bli_is_rih_packed( schema_a ) ) off_scl = 2; \
|
||||
else off_scl = 1; \
|
||||
\
|
||||
@@ -266,8 +266,8 @@ void PASTEMAC(ch,varname)( \
|
||||
packing formats are not applicable here since trsm is a two-
|
||||
operand operation only (unlike trmm, which is capable of three-
|
||||
operand). */ \
|
||||
if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
|
||||
else { ss_a_num = 1; ss_a_den = 1; } \
|
||||
if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
|
||||
else { ss_a_num = 1; ss_a_den = 1; } \
|
||||
\
|
||||
/* If there is a zero region above where the diagonal of A intersects the
|
||||
left edge of the block, adjust the pointer to C and treat this case as
|
||||
|
||||
@@ -256,8 +256,8 @@ void PASTEMAC(ch,varname)( \
|
||||
needs to occur in units of real values. The value computed
|
||||
here is divided into the complex pointer offset to cause the
|
||||
pointer to be advanced by the correct value. */ \
|
||||
if ( bli_is_4m_packed( schema_a ) || \
|
||||
bli_is_3m_packed( schema_a ) || \
|
||||
if ( bli_is_4mi_packed( schema_a ) || \
|
||||
bli_is_3mi_packed( schema_a ) || \
|
||||
bli_is_rih_packed( schema_a ) ) off_scl = 2; \
|
||||
else off_scl = 1; \
|
||||
\
|
||||
@@ -267,8 +267,8 @@ void PASTEMAC(ch,varname)( \
|
||||
packing formats are not applicable here since trsm is a two-
|
||||
operand operation only (unlike trmm, which is capable of three-
|
||||
operand). */ \
|
||||
if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
|
||||
else { ss_a_num = 1; ss_a_den = 1; } \
|
||||
if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
|
||||
else { ss_a_num = 1; ss_a_den = 1; } \
|
||||
\
|
||||
/* If there is a zero region to the left of where the diagonal of A
|
||||
intersects the top edge of the block, adjust the pointer to B and
|
||||
|
||||
@@ -264,8 +264,8 @@ void PASTEMAC(ch,varname)( \
|
||||
needs to occur in units of real values. The value computed
|
||||
here is divided into the complex pointer offset to cause the
|
||||
pointer to be advanced by the correct value. */ \
|
||||
if ( bli_is_4m_packed( schema_b ) || \
|
||||
bli_is_3m_packed( schema_b ) || \
|
||||
if ( bli_is_4mi_packed( schema_b ) || \
|
||||
bli_is_3mi_packed( schema_b ) || \
|
||||
bli_is_rih_packed( schema_b ) ) off_scl = 2; \
|
||||
else off_scl = 1; \
|
||||
\
|
||||
@@ -275,8 +275,8 @@ void PASTEMAC(ch,varname)( \
|
||||
packing formats are not applicable here since trsm is a two-
|
||||
operand operation only (unlike trmm, which is capable of three-
|
||||
operand). */ \
|
||||
if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
|
||||
else { ss_b_num = 1; ss_b_den = 1; } \
|
||||
if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
|
||||
else { ss_b_num = 1; ss_b_den = 1; } \
|
||||
\
|
||||
/* If there is a zero region above where the diagonal of B intersects
|
||||
the left edge of the panel, adjust the pointer to A and treat this
|
||||
|
||||
@@ -263,8 +263,8 @@ void PASTEMAC(ch,varname)( \
|
||||
needs to occur in units of real values. The value computed
|
||||
here is divided into the complex pointer offset to cause the
|
||||
pointer to be advanced by the correct value. */ \
|
||||
if ( bli_is_4m_packed( schema_b ) || \
|
||||
bli_is_3m_packed( schema_b ) || \
|
||||
if ( bli_is_4mi_packed( schema_b ) || \
|
||||
bli_is_3mi_packed( schema_b ) || \
|
||||
bli_is_rih_packed( schema_b ) ) off_scl = 2; \
|
||||
else off_scl = 1; \
|
||||
\
|
||||
@@ -274,8 +274,8 @@ void PASTEMAC(ch,varname)( \
|
||||
packing formats are not applicable here since trsm is a two-
|
||||
operand operation only (unlike trmm, which is capable of three-
|
||||
operand). */ \
|
||||
if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
|
||||
else { ss_b_num = 1; ss_b_den = 1; } \
|
||||
if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
|
||||
else { ss_b_num = 1; ss_b_den = 1; } \
|
||||
\
|
||||
/* If there is a zero region to the left of where the diagonal of B
|
||||
intersects the top edge of the panel, adjust the pointer to C and
|
||||
|
||||
@@ -213,13 +213,13 @@
|
||||
\
|
||||
( ( (obj).info & BLIS_PACK_PANEL_BIT ) )
|
||||
|
||||
#define bli_obj_is_4m_packed( obj ) \
|
||||
#define bli_obj_is_4mi_packed( obj ) \
|
||||
\
|
||||
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4M )
|
||||
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4MI )
|
||||
|
||||
#define bli_obj_is_3m_packed( obj ) \
|
||||
#define bli_obj_is_3mi_packed( obj ) \
|
||||
\
|
||||
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3M )
|
||||
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MI )
|
||||
|
||||
#define bli_obj_is_ro_packed( obj ) \
|
||||
\
|
||||
|
||||
@@ -521,13 +521,13 @@
|
||||
\
|
||||
( ( schema & BLIS_PACK_PANEL_BIT ) )
|
||||
|
||||
#define bli_is_4m_packed( schema ) \
|
||||
#define bli_is_4mi_packed( schema ) \
|
||||
\
|
||||
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4M )
|
||||
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4MI )
|
||||
|
||||
#define bli_is_3m_packed( schema ) \
|
||||
#define bli_is_3mi_packed( schema ) \
|
||||
\
|
||||
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3M )
|
||||
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MI )
|
||||
|
||||
#define bli_is_ro_packed( schema ) \
|
||||
\
|
||||
|
||||
@@ -198,35 +198,35 @@ typedef dcomplex f77_dcomplex;
|
||||
- 13: domain (0 == real, 1 == complex)
|
||||
- 14: precision (0 == single, 1 == double)
|
||||
- 15: unused
|
||||
21 ~ 16 Packed type/status
|
||||
- 000000: not packed
|
||||
- 100000: packed (unspecified; by rows, columns, or vector)
|
||||
- 100000: packed by rows
|
||||
- 100001: packed by columns
|
||||
- 100010: packed by row panels
|
||||
- 100011: packed by column panels
|
||||
- 100110: packed by 4m row panels
|
||||
- 100111: packed by 4m column panels
|
||||
- 101010: packed by 3m row panels
|
||||
- 101011: packed by 3m column panels
|
||||
- 110010: packed real-only row panels
|
||||
- 110011: packed real-only column panels
|
||||
- 110110: packed imag-only row panels
|
||||
- 110111: packed imag-only column panels
|
||||
- 111010: packed real+imag row panels
|
||||
- 111011: packed real+imag column panels
|
||||
22 Packed panel order if upper-stored
|
||||
22 ~ 16 Packed type/status
|
||||
- 0 0000 00: not packed
|
||||
- 1 0000 00: packed (unspecified; by rows, columns, or vector)
|
||||
- 1 0000 00: packed by rows
|
||||
- 1 0000 01: packed by columns
|
||||
- 1 0000 10: packed by row panels
|
||||
- 1 0000 11: packed by column panels
|
||||
- 1 0001 10: packed by 4m interleaved row panels
|
||||
- 1 0001 11: packed by 4m interleaved column panels
|
||||
- 1 0010 10: packed by 3m interleaved row panels
|
||||
- 1 0010 11: packed by 3m interleaved column panels
|
||||
- 1 0101 10: packed real-only row panels
|
||||
- 1 0101 11: packed real-only column panels
|
||||
- 1 0110 10: packed imag-only row panels
|
||||
- 1 0110 11: packed imag-only column panels
|
||||
- 1 0111 10: packed real+imag row panels
|
||||
- 1 0111 11: packed real+imag column panels
|
||||
23 Packed panel order if upper-stored
|
||||
- 0 == forward order if upper
|
||||
- 1 == reverse order if upper
|
||||
23 Packed panel order if lower-stored
|
||||
24 Packed panel order if lower-stored
|
||||
- 0 == forward order if lower
|
||||
- 1 == reverse order if lower
|
||||
25 ~ 24 Packed buffer type
|
||||
26 ~ 25 Packed buffer type
|
||||
- 0 == block of A
|
||||
- 1 == panel of B
|
||||
- 2 == panel of C
|
||||
- 3 == general use
|
||||
27 ~ 26 Structure type
|
||||
28 ~ 27 Structure type
|
||||
- 0 == general
|
||||
- 1 == Hermitian
|
||||
- 2 == symmetric
|
||||
@@ -251,11 +251,11 @@ typedef dcomplex f77_dcomplex;
|
||||
#define BLIS_PACK_RC_SHIFT 16
|
||||
#define BLIS_PACK_PANEL_SHIFT 17
|
||||
#define BLIS_PACK_FORMAT_SHIFT 18
|
||||
#define BLIS_PACK_SHIFT 21
|
||||
#define BLIS_PACK_REV_IF_UPPER_SHIFT 22
|
||||
#define BLIS_PACK_REV_IF_LOWER_SHIFT 23
|
||||
#define BLIS_PACK_BUFFER_SHIFT 24
|
||||
#define BLIS_STRUC_SHIFT 26
|
||||
#define BLIS_PACK_SHIFT 22
|
||||
#define BLIS_PACK_REV_IF_UPPER_SHIFT 23
|
||||
#define BLIS_PACK_REV_IF_LOWER_SHIFT 24
|
||||
#define BLIS_PACK_BUFFER_SHIFT 25
|
||||
#define BLIS_STRUC_SHIFT 27
|
||||
|
||||
//
|
||||
// -- BLIS info bit field masks ------------------------------------------------
|
||||
@@ -275,10 +275,10 @@ typedef dcomplex f77_dcomplex;
|
||||
#define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT )
|
||||
#define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT )
|
||||
#define BLIS_EXECUTION_DT_BITS ( 0x7 << BLIS_EXECUTION_DT_SHIFT )
|
||||
#define BLIS_PACK_SCHEMA_BITS ( 0x3F << BLIS_PACK_SCHEMA_SHIFT )
|
||||
#define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT )
|
||||
#define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT )
|
||||
#define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT )
|
||||
#define BLIS_PACK_FORMAT_BITS ( 0x7 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_PACK_FORMAT_BITS ( 0xF << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_PACK_BIT ( 0x1 << BLIS_PACK_SHIFT )
|
||||
#define BLIS_PACK_REV_IF_UPPER_BIT ( 0x1 << BLIS_PACK_REV_IF_UPPER_SHIFT )
|
||||
#define BLIS_PACK_REV_IF_LOWER_BIT ( 0x1 << BLIS_PACK_REV_IF_LOWER_SHIFT )
|
||||
@@ -290,61 +290,61 @@ typedef dcomplex f77_dcomplex;
|
||||
// -- BLIS enumerated type value definitions -----------------------------------
|
||||
//
|
||||
|
||||
#define BLIS_BITVAL_REAL 0x0
|
||||
#define BLIS_BITVAL_COMPLEX BLIS_DOMAIN_BIT
|
||||
#define BLIS_BITVAL_SINGLE_PREC 0x0
|
||||
#define BLIS_BITVAL_DOUBLE_PREC BLIS_PRECISION_BIT
|
||||
#define BLIS_BITVAL_FLOAT_TYPE 0x0
|
||||
#define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT
|
||||
#define BLIS_BITVAL_DOUBLE_TYPE BLIS_PRECISION_BIT
|
||||
#define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT )
|
||||
#define BLIS_BITVAL_INT_TYPE 0x04
|
||||
#define BLIS_BITVAL_CONST_TYPE 0x05
|
||||
#define BLIS_BITVAL_NO_TRANS 0x0
|
||||
#define BLIS_BITVAL_TRANS BLIS_TRANS_BIT
|
||||
#define BLIS_BITVAL_NO_CONJ 0x0
|
||||
#define BLIS_BITVAL_CONJ BLIS_CONJ_BIT
|
||||
#define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT )
|
||||
#define BLIS_BITVAL_ZEROS 0x0
|
||||
#define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT )
|
||||
#define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT )
|
||||
#define BLIS_BITVAL_DENSE BLIS_UPLO_BITS
|
||||
#define BLIS_BITVAL_NONUNIT_DIAG 0x0
|
||||
#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT
|
||||
#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT
|
||||
#define BLIS_BITVAL_NOT_PACKED 0x0
|
||||
#define BLIS_BITVAL_4M ( 0x1 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_3M ( 0x2 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_RO ( 0x5 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_IO ( 0x6 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_RPI ( 0x7 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_PACKED_UNSPEC BLIS_PACK_BIT
|
||||
#define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_4M ( BLIS_PACK_BIT | BLIS_BITVAL_4M | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_4M ( BLIS_PACK_BIT | BLIS_BITVAL_4M | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_3M ( BLIS_PACK_BIT | BLIS_BITVAL_3M | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_3M ( BLIS_PACK_BIT | BLIS_BITVAL_3M | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_REAL 0x0
|
||||
#define BLIS_BITVAL_COMPLEX BLIS_DOMAIN_BIT
|
||||
#define BLIS_BITVAL_SINGLE_PREC 0x0
|
||||
#define BLIS_BITVAL_DOUBLE_PREC BLIS_PRECISION_BIT
|
||||
#define BLIS_BITVAL_FLOAT_TYPE 0x0
|
||||
#define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT
|
||||
#define BLIS_BITVAL_DOUBLE_TYPE BLIS_PRECISION_BIT
|
||||
#define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT )
|
||||
#define BLIS_BITVAL_INT_TYPE 0x04
|
||||
#define BLIS_BITVAL_CONST_TYPE 0x05
|
||||
#define BLIS_BITVAL_NO_TRANS 0x0
|
||||
#define BLIS_BITVAL_TRANS BLIS_TRANS_BIT
|
||||
#define BLIS_BITVAL_NO_CONJ 0x0
|
||||
#define BLIS_BITVAL_CONJ BLIS_CONJ_BIT
|
||||
#define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT )
|
||||
#define BLIS_BITVAL_ZEROS 0x0
|
||||
#define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT )
|
||||
#define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT )
|
||||
#define BLIS_BITVAL_DENSE BLIS_UPLO_BITS
|
||||
#define BLIS_BITVAL_NONUNIT_DIAG 0x0
|
||||
#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT
|
||||
#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT
|
||||
#define BLIS_BITVAL_NOT_PACKED 0x0
|
||||
#define BLIS_BITVAL_4MI ( 0x1 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_3MI ( 0x2 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_RO ( 0x5 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_IO ( 0x6 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_RPI ( 0x7 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_PACKED_UNSPEC BLIS_PACK_BIT
|
||||
#define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
|
||||
#define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
|
||||
#define BLIS_BITVAL_PACK_REV_IF_LOWER BLIS_PACK_REV_IF_LOWER_BIT
|
||||
#define BLIS_BITVAL_BUFFER_FOR_A_BLOCK 0x0
|
||||
#define BLIS_BITVAL_BUFFER_FOR_B_PANEL ( 0x1 << BLIS_PACK_BUFFER_SHIFT )
|
||||
#define BLIS_BITVAL_BUFFER_FOR_C_PANEL ( 0x2 << BLIS_PACK_BUFFER_SHIFT )
|
||||
#define BLIS_BITVAL_BUFFER_FOR_GEN_USE ( 0x3 << BLIS_PACK_BUFFER_SHIFT )
|
||||
#define BLIS_BITVAL_GENERAL 0x0
|
||||
#define BLIS_BITVAL_HERMITIAN ( 0x1 << BLIS_STRUC_SHIFT )
|
||||
#define BLIS_BITVAL_SYMMETRIC ( 0x2 << BLIS_STRUC_SHIFT )
|
||||
#define BLIS_BITVAL_TRIANGULAR ( 0x3 << BLIS_STRUC_SHIFT )
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
|
||||
#define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
|
||||
#define BLIS_BITVAL_PACK_REV_IF_LOWER BLIS_PACK_REV_IF_LOWER_BIT
|
||||
#define BLIS_BITVAL_BUFFER_FOR_A_BLOCK 0x0
|
||||
#define BLIS_BITVAL_BUFFER_FOR_B_PANEL ( 0x1 << BLIS_PACK_BUFFER_SHIFT )
|
||||
#define BLIS_BITVAL_BUFFER_FOR_C_PANEL ( 0x2 << BLIS_PACK_BUFFER_SHIFT )
|
||||
#define BLIS_BITVAL_BUFFER_FOR_GEN_USE ( 0x3 << BLIS_PACK_BUFFER_SHIFT )
|
||||
#define BLIS_BITVAL_GENERAL 0x0
|
||||
#define BLIS_BITVAL_HERMITIAN ( 0x1 << BLIS_STRUC_SHIFT )
|
||||
#define BLIS_BITVAL_SYMMETRIC ( 0x2 << BLIS_STRUC_SHIFT )
|
||||
#define BLIS_BITVAL_TRIANGULAR ( 0x3 << BLIS_STRUC_SHIFT )
|
||||
|
||||
|
||||
//
|
||||
@@ -431,21 +431,21 @@ typedef enum
|
||||
|
||||
typedef enum
|
||||
{
|
||||
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
|
||||
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
|
||||
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
|
||||
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
|
||||
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
|
||||
BLIS_PACKED_ROW_PANELS_4M = BLIS_BITVAL_PACKED_ROW_PANELS_4M,
|
||||
BLIS_PACKED_COL_PANELS_4M = BLIS_BITVAL_PACKED_COL_PANELS_4M,
|
||||
BLIS_PACKED_ROW_PANELS_3M = BLIS_BITVAL_PACKED_ROW_PANELS_3M,
|
||||
BLIS_PACKED_COL_PANELS_3M = BLIS_BITVAL_PACKED_COL_PANELS_3M,
|
||||
BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO,
|
||||
BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO,
|
||||
BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO,
|
||||
BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO,
|
||||
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
|
||||
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
|
||||
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
|
||||
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
|
||||
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
|
||||
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
|
||||
BLIS_PACKED_ROW_PANELS_4MI = BLIS_BITVAL_PACKED_ROW_PANELS_4MI,
|
||||
BLIS_PACKED_COL_PANELS_4MI = BLIS_BITVAL_PACKED_COL_PANELS_4MI,
|
||||
BLIS_PACKED_ROW_PANELS_3MI = BLIS_BITVAL_PACKED_ROW_PANELS_3MI,
|
||||
BLIS_PACKED_COL_PANELS_3MI = BLIS_BITVAL_PACKED_COL_PANELS_3MI,
|
||||
BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO,
|
||||
BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO,
|
||||
BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO,
|
||||
BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO,
|
||||
BLIS_PACKED_ROW_PANELS_RPI = BLIS_BITVAL_PACKED_ROW_PANELS_RPI,
|
||||
BLIS_PACKED_COL_PANELS_RPI = BLIS_BITVAL_PACKED_COL_PANELS_RPI,
|
||||
} pack_t;
|
||||
|
||||
Reference in New Issue
Block a user