Renamed 3m and 4m symbols/macros to 3mi and 4mi.

Details:
- Renamed several variables and macros from 3m/4m to 3mi/4mi. This is
  because those packing schemas were always implicitly "interleaved".
  This new naming scheme will make way for new schemas that separate
  instead of interleve the real and imaginary (and summed) parts.
- Expanded the pack format sub-field of the pack schema field of the
  info_t to 4 bits (from 3). This will allow for more schema types
  going forward.
- Removed old _cntl.c files for herk3m, herk4m, trmm3m, trmm4m.
This commit is contained in:
Field G. Van Zee
2015-02-19 17:06:10 -06:00
parent af32e3a608
commit 441d47542a
22 changed files with 151 additions and 979 deletions

View File

@@ -152,8 +152,8 @@ void bli_packm_blk_var2( obj_t* c,
// Choose the correct func_t object based on the pack_t schema.
if ( bli_is_4m_packed( schema ) ) packm_kers = packm_struc_cxk_4m_kers;
else if ( bli_is_3m_packed( schema ) ) packm_kers = packm_struc_cxk_3m_kers;
if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4m_kers;
else if ( bli_is_3mi_packed( schema ) ) packm_kers = packm_struc_cxk_3m_kers;
else if ( bli_is_ro_packed( schema ) ||
bli_is_io_packed( schema ) ||
bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers;
@@ -330,7 +330,7 @@ void PASTEMAC(ch,varname)( \
1/2. In both cases, we are compensating for the fact that pointer
arithmetic occurs in terms of complex elements rather than real
elements. */ \
if ( bli_is_3m_packed( schema ) ) { ss_num = 3; ss_den = 2; } \
if ( bli_is_3mi_packed( schema ) ) { ss_num = 3; ss_den = 2; } \
else if ( bli_is_rih_packed( schema ) ) { ss_num = 1; ss_den = 2; } \
else { ss_num = 1; ss_den = 1; } \
\

View File

@@ -362,7 +362,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
// Why? Because the macro-kernel indexes in units of the complex
// datatype. So these changes "trick" it into indexing the correct
// amount.
if ( bli_is_3m_packed( pack_schema ) )
if ( bli_is_3mi_packed( pack_schema ) )
{
ps_p = ( ps_p * 3 ) / 2;
@@ -460,7 +460,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
// Why? Because the macro-kernel indexes in units of the complex
// datatype. So these changes "trick" it into indexing the correct
// amount.
if ( bli_is_3m_packed( pack_schema ) )
if ( bli_is_3mi_packed( pack_schema ) )
{
ps_p = ( ps_p * 3 ) / 2;

View File

@@ -134,7 +134,7 @@ void bli_gemm3m_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_3M,
BLIS_PACKED_ROW_PANELS_3MI,
BLIS_BUFFER_FOR_A_BLOCK );
gemm3m_packb_cntl
@@ -146,7 +146,7 @@ void bli_gemm3m_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_3M,
BLIS_PACKED_COL_PANELS_3MI,
BLIS_BUFFER_FOR_B_PANEL );

View File

@@ -131,7 +131,7 @@ void bli_gemm4m_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_4M,
BLIS_PACKED_ROW_PANELS_4MI,
BLIS_BUFFER_FOR_A_BLOCK );
gemm4m_packb_cntl
@@ -143,7 +143,7 @@ void bli_gemm4m_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_4M,
BLIS_PACKED_COL_PANELS_4MI,
BLIS_BUFFER_FOR_B_PANEL );

View File

@@ -125,7 +125,7 @@ void bli_gemm4mb_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_4M,
BLIS_PACKED_ROW_PANELS_4MI,
BLIS_BUFFER_FOR_A_BLOCK );
gemm4mb_packb_cntl
@@ -137,7 +137,7 @@ void bli_gemm4mb_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_4M,
BLIS_PACKED_COL_PANELS_4MI,
BLIS_BUFFER_FOR_B_PANEL );

View File

@@ -1,156 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
extern scalm_t* scalm_cntl;
extern blksz_t* gemm3m_mc;
extern blksz_t* gemm3m_nc;
extern blksz_t* gemm3m_kc;
extern blksz_t* gemm3m_mr;
extern blksz_t* gemm3m_nr;
extern blksz_t* gemm3m_kr;
extern func_t* gemm3m_ukrs;
packm_t* herk3m_packa_cntl;
packm_t* herk3m_packb_cntl;
herk_t* herk3m_cntl_bp_ke;
herk_t* herk3m_cntl_op_bp;
herk_t* herk3m_cntl_mm_op;
herk_t* herk3m_cntl_vl_mm;
herk_t* herk3m_cntl;
void bli_herk3m_cntl_init()
{
// Create control tree objects for packm operations.
herk3m_packa_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm3m_mr,
gemm3m_kr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_3M,
BLIS_BUFFER_FOR_A_BLOCK );
herk3m_packb_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm3m_kr,
gemm3m_nr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_3M,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree object for lowest-level block-panel kernel.
herk3m_cntl_bp_ke
=
bli_herk_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL,
gemm3m_ukrs,
NULL, NULL, NULL,
NULL, NULL, NULL );
// Create control tree object for outer panel (to block-panel)
// problem.
herk3m_cntl_op_bp
=
bli_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemm3m_mc,
gemm3m_ukrs,
NULL,
herk3m_packa_cntl,
herk3m_packb_cntl,
NULL,
herk3m_cntl_bp_ke,
NULL );
// Create control tree object for general problem via multiple
// rank-k (outer panel) updates.
herk3m_cntl_mm_op
=
bli_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
gemm3m_kc,
gemm3m_ukrs,
NULL,
NULL,
NULL,
NULL,
herk3m_cntl_op_bp,
NULL );
// Create control tree object for very large problem via multiple
// general problems.
herk3m_cntl_vl_mm
=
bli_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm3m_nc,
gemm3m_ukrs,
NULL,
NULL,
NULL,
NULL,
herk3m_cntl_mm_op,
NULL );
// Alias the "master" herk control tree to a shorter name.
herk3m_cntl = herk3m_cntl_vl_mm;
}
void bli_herk3m_cntl_finalize()
{
bli_cntl_obj_free( herk3m_packa_cntl );
bli_cntl_obj_free( herk3m_packb_cntl );
bli_cntl_obj_free( herk3m_cntl_bp_ke );
bli_cntl_obj_free( herk3m_cntl_op_bp );
bli_cntl_obj_free( herk3m_cntl_mm_op );
bli_cntl_obj_free( herk3m_cntl_vl_mm );
}

View File

@@ -1,156 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
extern scalm_t* scalm_cntl;
extern blksz_t* gemm4m_mc;
extern blksz_t* gemm4m_nc;
extern blksz_t* gemm4m_kc;
extern blksz_t* gemm4m_mr;
extern blksz_t* gemm4m_nr;
extern blksz_t* gemm4m_kr;
extern func_t* gemm4m_ukrs;
packm_t* herk4m_packa_cntl;
packm_t* herk4m_packb_cntl;
herk_t* herk4m_cntl_bp_ke;
herk_t* herk4m_cntl_op_bp;
herk_t* herk4m_cntl_mm_op;
herk_t* herk4m_cntl_vl_mm;
herk_t* herk4m_cntl;
void bli_herk4m_cntl_init()
{
// Create control tree objects for packm operations.
herk4m_packa_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm4m_mr,
gemm4m_kr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_4M,
BLIS_BUFFER_FOR_A_BLOCK );
herk4m_packb_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm4m_kr,
gemm4m_nr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_4M,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree object for lowest-level block-panel kernel.
herk4m_cntl_bp_ke
=
bli_herk_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL,
gemm4m_ukrs,
NULL, NULL, NULL,
NULL, NULL, NULL );
// Create control tree object for outer panel (to block-panel)
// problem.
herk4m_cntl_op_bp
=
bli_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemm4m_mc,
gemm4m_ukrs,
NULL,
herk4m_packa_cntl,
herk4m_packb_cntl,
NULL,
herk4m_cntl_bp_ke,
NULL );
// Create control tree object for general problem via multiple
// rank-k (outer panel) updates.
herk4m_cntl_mm_op
=
bli_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
gemm4m_kc,
gemm4m_ukrs,
NULL,
NULL,
NULL,
NULL,
herk4m_cntl_op_bp,
NULL );
// Create control tree object for very large problem via multiple
// general problems.
herk4m_cntl_vl_mm
=
bli_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm4m_nc,
gemm4m_ukrs,
NULL,
NULL,
NULL,
NULL,
herk4m_cntl_mm_op,
NULL );
// Alias the "master" herk control tree to a shorter name.
herk4m_cntl = herk4m_cntl_vl_mm;
}
void bli_herk4m_cntl_finalize()
{
bli_cntl_obj_free( herk4m_packa_cntl );
bli_cntl_obj_free( herk4m_packb_cntl );
bli_cntl_obj_free( herk4m_cntl_bp_ke );
bli_cntl_obj_free( herk4m_cntl_op_bp );
bli_cntl_obj_free( herk4m_cntl_mm_op );
bli_cntl_obj_free( herk4m_cntl_vl_mm );
}

View File

@@ -1,258 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
extern scalm_t* scalm_cntl;
extern blksz_t* gemm3m_mc;
extern blksz_t* gemm3m_nc;
extern blksz_t* gemm3m_kc;
extern blksz_t* gemm3m_mr;
extern blksz_t* gemm3m_nr;
extern blksz_t* gemm3m_kr;
extern func_t* gemm3m_ukrs;
extern gemm_t* gemm3m_cntl_bp_ke;
packm_t* trmm3m_l_packa_cntl;
packm_t* trmm3m_l_packb_cntl;
packm_t* trmm3m_r_packa_cntl;
packm_t* trmm3m_r_packb_cntl;
gemm_t* trmm3m_cntl_bp_ke;
gemm_t* trmm3m_l_cntl_op_bp;
gemm_t* trmm3m_l_cntl_mm_op;
gemm_t* trmm3m_l_cntl_vl_mm;
gemm_t* trmm3m_r_cntl_op_bp;
gemm_t* trmm3m_r_cntl_mm_op;
gemm_t* trmm3m_r_cntl_vl_mm;
gemm_t* trmm3m_l_cntl;
gemm_t* trmm3m_r_cntl;
void bli_trmm3m_cntl_init()
{
// Create control tree objects for packm operations (left side).
trmm3m_l_packa_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
// IMPORTANT: for consistency with trsm, "k" dim
// multiple is set to mr.
gemm3m_mr,
gemm3m_kr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_3M,
BLIS_BUFFER_FOR_A_BLOCK );
trmm3m_l_packb_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
// IMPORTANT: m dim multiple here must be mr
// since "k" dim multiple is set to mr above.
gemm3m_kr,
gemm3m_nr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_3M,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm operations (right side).
trmm3m_r_packa_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
// IMPORTANT: for consistency with trsm, "k" dim
// multiple is set to nr.
gemm3m_mr,
gemm3m_nr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_3M,
BLIS_BUFFER_FOR_A_BLOCK );
trmm3m_r_packb_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
// IMPORTANT: m dim multiple here must be nr
// since "k" dim multiple is set to nr above.
gemm3m_nr,
gemm3m_nr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_3M,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree object for lowest-level block-panel kernel.
trmm3m_cntl_bp_ke
=
bli_trmm_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL,
gemm3m_ukrs,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL );
// Create control tree object for outer panel (to block-panel)
// problem (left side).
trmm3m_l_cntl_op_bp
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemm3m_mc,
gemm3m_ukrs,
NULL,
trmm3m_l_packa_cntl,
trmm3m_l_packb_cntl,
NULL,
trmm3m_cntl_bp_ke,
gemm3m_cntl_bp_ke,
NULL );
// Create control tree object for general problem via multiple
// rank-k (outer panel) updates (left side).
trmm3m_l_cntl_mm_op
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
gemm3m_kc,
gemm3m_ukrs,
NULL,
NULL,
NULL,
NULL,
trmm3m_l_cntl_op_bp,
NULL,
NULL );
// Create control tree object for very large problem via multiple
// general problems (left side).
trmm3m_l_cntl_vl_mm
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm3m_nc,
gemm3m_ukrs,
NULL,
NULL,
NULL,
NULL,
trmm3m_l_cntl_mm_op,
NULL,
NULL );
// Create control tree object for outer panel (to block-panel)
// problem (right side).
trmm3m_r_cntl_op_bp
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemm3m_mc,
gemm3m_ukrs,
NULL,
trmm3m_r_packa_cntl,
trmm3m_r_packb_cntl,
NULL,
trmm3m_cntl_bp_ke,
gemm3m_cntl_bp_ke,
NULL );
// Create control tree object for general problem via multiple
// rank-k (outer panel) updates (right side).
trmm3m_r_cntl_mm_op
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
gemm3m_kc,
gemm3m_ukrs,
NULL,
NULL,
NULL,
NULL,
trmm3m_r_cntl_op_bp,
NULL,
NULL );
// Create control tree object for very large problem via multiple
// general problems (right side).
trmm3m_r_cntl_vl_mm
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm3m_nc,
gemm3m_ukrs,
NULL,
NULL,
NULL,
NULL,
trmm3m_r_cntl_mm_op,
NULL,
NULL );
// Alias the "master" trmm control trees to shorter names.
trmm3m_l_cntl = trmm3m_l_cntl_vl_mm;
trmm3m_r_cntl = trmm3m_r_cntl_vl_mm;
}
void bli_trmm3m_cntl_finalize()
{
bli_cntl_obj_free( trmm3m_l_packa_cntl );
bli_cntl_obj_free( trmm3m_l_packb_cntl );
bli_cntl_obj_free( trmm3m_r_packa_cntl );
bli_cntl_obj_free( trmm3m_r_packb_cntl );
bli_cntl_obj_free( trmm3m_cntl_bp_ke );
bli_cntl_obj_free( trmm3m_l_cntl_op_bp );
bli_cntl_obj_free( trmm3m_l_cntl_mm_op );
bli_cntl_obj_free( trmm3m_l_cntl_vl_mm );
bli_cntl_obj_free( trmm3m_r_cntl_op_bp );
bli_cntl_obj_free( trmm3m_r_cntl_mm_op );
bli_cntl_obj_free( trmm3m_r_cntl_vl_mm );
}

View File

@@ -1,258 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
extern scalm_t* scalm_cntl;
extern blksz_t* gemm4m_mc;
extern blksz_t* gemm4m_nc;
extern blksz_t* gemm4m_kc;
extern blksz_t* gemm4m_mr;
extern blksz_t* gemm4m_nr;
extern blksz_t* gemm4m_kr;
extern func_t* gemm4m_ukrs;
extern gemm_t* gemm4m_cntl_bp_ke;
packm_t* trmm4m_l_packa_cntl;
packm_t* trmm4m_l_packb_cntl;
packm_t* trmm4m_r_packa_cntl;
packm_t* trmm4m_r_packb_cntl;
gemm_t* trmm4m_cntl_bp_ke;
gemm_t* trmm4m_l_cntl_op_bp;
gemm_t* trmm4m_l_cntl_mm_op;
gemm_t* trmm4m_l_cntl_vl_mm;
gemm_t* trmm4m_r_cntl_op_bp;
gemm_t* trmm4m_r_cntl_mm_op;
gemm_t* trmm4m_r_cntl_vl_mm;
gemm_t* trmm4m_l_cntl;
gemm_t* trmm4m_r_cntl;
void bli_trmm4m_cntl_init()
{
// Create control tree objects for packm operations (left side).
trmm4m_l_packa_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
// IMPORTANT: for consistency with trsm, "k" dim
// multiple is set to mr.
gemm4m_mr,
gemm4m_kr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_4M,
BLIS_BUFFER_FOR_A_BLOCK );
trmm4m_l_packb_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
// IMPORTANT: m dim multiple here must be mr
// since "k" dim multiple is set to mr above.
gemm4m_kr,
gemm4m_nr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_4M,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm operations (right side).
trmm4m_r_packa_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
// IMPORTANT: for consistency with trsm, "k" dim
// multiple is set to nr.
gemm4m_mr,
gemm4m_nr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_4M,
BLIS_BUFFER_FOR_A_BLOCK );
trmm4m_r_packb_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
// IMPORTANT: m dim multiple here must be nr
// since "k" dim multiple is set to nr above.
gemm4m_nr,
gemm4m_nr,
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_4M,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree object for lowest-level block-panel kernel.
trmm4m_cntl_bp_ke
=
bli_trmm_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL,
gemm4m_ukrs,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL );
// Create control tree object for outer panel (to block-panel)
// problem (left side).
trmm4m_l_cntl_op_bp
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemm4m_mc,
gemm4m_ukrs,
NULL,
trmm4m_l_packa_cntl,
trmm4m_l_packb_cntl,
NULL,
trmm4m_cntl_bp_ke,
gemm4m_cntl_bp_ke,
NULL );
// Create control tree object for general problem via multiple
// rank-k (outer panel) updates (left side).
trmm4m_l_cntl_mm_op
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
gemm4m_kc,
gemm4m_ukrs,
NULL,
NULL,
NULL,
NULL,
trmm4m_l_cntl_op_bp,
NULL,
NULL );
// Create control tree object for very large problem via multiple
// general problems (left side).
trmm4m_l_cntl_vl_mm
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm4m_nc,
gemm4m_ukrs,
NULL,
NULL,
NULL,
NULL,
trmm4m_l_cntl_mm_op,
NULL,
NULL );
// Create control tree object for outer panel (to block-panel)
// problem (right side).
trmm4m_r_cntl_op_bp
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
gemm4m_mc,
gemm4m_ukrs,
NULL,
trmm4m_r_packa_cntl,
trmm4m_r_packb_cntl,
NULL,
trmm4m_cntl_bp_ke,
gemm4m_cntl_bp_ke,
NULL );
// Create control tree object for general problem via multiple
// rank-k (outer panel) updates (right side).
trmm4m_r_cntl_mm_op
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
gemm4m_kc,
gemm4m_ukrs,
NULL,
NULL,
NULL,
NULL,
trmm4m_r_cntl_op_bp,
NULL,
NULL );
// Create control tree object for very large problem via multiple
// general problems (right side).
trmm4m_r_cntl_vl_mm
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
gemm4m_nc,
gemm4m_ukrs,
NULL,
NULL,
NULL,
NULL,
trmm4m_r_cntl_mm_op,
NULL,
NULL );
// Alias the "master" trmm control trees to shorter names.
trmm4m_l_cntl = trmm4m_l_cntl_vl_mm;
trmm4m_r_cntl = trmm4m_r_cntl_vl_mm;
}
void bli_trmm4m_cntl_finalize()
{
bli_cntl_obj_free( trmm4m_l_packa_cntl );
bli_cntl_obj_free( trmm4m_l_packb_cntl );
bli_cntl_obj_free( trmm4m_r_packa_cntl );
bli_cntl_obj_free( trmm4m_r_packb_cntl );
bli_cntl_obj_free( trmm4m_cntl_bp_ke );
bli_cntl_obj_free( trmm4m_l_cntl_op_bp );
bli_cntl_obj_free( trmm4m_l_cntl_mm_op );
bli_cntl_obj_free( trmm4m_l_cntl_vl_mm );
bli_cntl_obj_free( trmm4m_r_cntl_op_bp );
bli_cntl_obj_free( trmm4m_r_cntl_mm_op );
bli_cntl_obj_free( trmm4m_r_cntl_vl_mm );
}

View File

@@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \
needs to occur in units of real values. The value computed
here is divided into the complex pointer offset to cause the
pointer to be advanced by the correct value. */ \
if ( bli_is_4m_packed( schema_a ) || \
bli_is_3m_packed( schema_a ) || \
if ( bli_is_4mi_packed( schema_a ) || \
bli_is_3mi_packed( schema_a ) || \
bli_is_rih_packed( schema_a ) ) off_scl = 2; \
else off_scl = 1; \
\
@@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \
summed-only, we need to scale the computed panel sizes by 1/2
to compensate for the fact that the pointer arithmetic occurs
in terms of complex elements rather than real elements. */ \
if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \
else { ss_a_num = 1; ss_a_den = 1; } \
\

View File

@@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \
needs to occur in units of real values. The value computed
here is divided into the complex pointer offset to cause the
pointer to be advanced by the correct value. */ \
if ( bli_is_4m_packed( schema_a ) || \
bli_is_3m_packed( schema_a ) || \
if ( bli_is_4mi_packed( schema_a ) || \
bli_is_3mi_packed( schema_a ) || \
bli_is_rih_packed( schema_a ) ) off_scl = 2; \
else off_scl = 1; \
\
@@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \
summed-only, we need to scale the computed panel sizes by 1/2
to compensate for the fact that the pointer arithmetic occurs
in terms of complex elements rather than real elements. */ \
if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \
else { ss_a_num = 1; ss_a_den = 1; } \
\

View File

@@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \
needs to occur in units of real values. The value computed
here is divided into the complex pointer offset to cause the
pointer to be advanced by the correct value. */ \
if ( bli_is_4m_packed( schema_b ) || \
bli_is_3m_packed( schema_b ) || \
if ( bli_is_4mi_packed( schema_b ) || \
bli_is_3mi_packed( schema_b ) || \
bli_is_rih_packed( schema_b ) ) off_scl = 2; \
else off_scl = 1; \
\
@@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \
summed-only, we need to scale the computed panel sizes by 1/2
to compensate for the fact that the pointer arithmetic occurs
in terms of complex elements rather than real elements. */ \
if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \
else { ss_b_num = 1; ss_b_den = 1; } \
\

View File

@@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \
needs to occur in units of real values. The value computed
here is divided into the complex pointer offset to cause the
pointer to be advanced by the correct value. */ \
if ( bli_is_4m_packed( schema_b ) || \
bli_is_3m_packed( schema_b ) || \
if ( bli_is_4mi_packed( schema_b ) || \
bli_is_3mi_packed( schema_b ) || \
bli_is_rih_packed( schema_b ) ) off_scl = 2; \
else off_scl = 1; \
\
@@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \
summed-only, we need to scale the computed panel sizes by 1/2
to compensate for the fact that the pointer arithmetic occurs
in terms of complex elements rather than real elements. */ \
if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \
else { ss_b_num = 1; ss_b_den = 1; } \
\

View File

@@ -120,7 +120,7 @@ void bli_trsm3m_cntl_init()
TRUE, // invert diagonal
TRUE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_3M,
BLIS_PACKED_ROW_PANELS_3MI,
BLIS_BUFFER_FOR_A_BLOCK );
trsm3m_l_packb_cntl
@@ -134,7 +134,7 @@ void bli_trsm3m_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_3M,
BLIS_PACKED_COL_PANELS_3MI,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm operations (right side).
@@ -147,7 +147,7 @@ void bli_trsm3m_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_3M,
BLIS_PACKED_ROW_PANELS_3MI,
BLIS_BUFFER_FOR_A_BLOCK );
trsm3m_r_packb_cntl
@@ -159,7 +159,7 @@ void bli_trsm3m_cntl_init()
TRUE, // invert diagonal
FALSE, // reverse iteration if upper?
TRUE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_3M,
BLIS_PACKED_COL_PANELS_3MI,
BLIS_BUFFER_FOR_B_PANEL );

View File

@@ -120,7 +120,7 @@ void bli_trsm4m_cntl_init()
TRUE, // invert diagonal
TRUE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_4M,
BLIS_PACKED_ROW_PANELS_4MI,
BLIS_BUFFER_FOR_A_BLOCK );
trsm4m_l_packb_cntl
@@ -134,7 +134,7 @@ void bli_trsm4m_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_4M,
BLIS_PACKED_COL_PANELS_4MI,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm operations (right side).
@@ -147,7 +147,7 @@ void bli_trsm4m_cntl_init()
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS_4M,
BLIS_PACKED_ROW_PANELS_4MI,
BLIS_BUFFER_FOR_A_BLOCK );
trsm4m_r_packb_cntl
@@ -159,7 +159,7 @@ void bli_trsm4m_cntl_init()
TRUE, // invert diagonal
FALSE, // reverse iteration if upper?
TRUE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS_4M,
BLIS_PACKED_COL_PANELS_4MI,
BLIS_BUFFER_FOR_B_PANEL );

View File

@@ -255,8 +255,8 @@ void PASTEMAC(ch,varname)( \
needs to occur in units of real values. The value computed
here is divided into the complex pointer offset to cause the
pointer to be advanced by the correct value. */ \
if ( bli_is_4m_packed( schema_a ) || \
bli_is_3m_packed( schema_a ) || \
if ( bli_is_4mi_packed( schema_a ) || \
bli_is_3mi_packed( schema_a ) || \
bli_is_rih_packed( schema_a ) ) off_scl = 2; \
else off_scl = 1; \
\
@@ -266,8 +266,8 @@ void PASTEMAC(ch,varname)( \
packing formats are not applicable here since trsm is a two-
operand operation only (unlike trmm, which is capable of three-
operand). */ \
if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
else { ss_a_num = 1; ss_a_den = 1; } \
if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
else { ss_a_num = 1; ss_a_den = 1; } \
\
/* If there is a zero region above where the diagonal of A intersects the
left edge of the block, adjust the pointer to C and treat this case as

View File

@@ -256,8 +256,8 @@ void PASTEMAC(ch,varname)( \
needs to occur in units of real values. The value computed
here is divided into the complex pointer offset to cause the
pointer to be advanced by the correct value. */ \
if ( bli_is_4m_packed( schema_a ) || \
bli_is_3m_packed( schema_a ) || \
if ( bli_is_4mi_packed( schema_a ) || \
bli_is_3mi_packed( schema_a ) || \
bli_is_rih_packed( schema_a ) ) off_scl = 2; \
else off_scl = 1; \
\
@@ -267,8 +267,8 @@ void PASTEMAC(ch,varname)( \
packing formats are not applicable here since trsm is a two-
operand operation only (unlike trmm, which is capable of three-
operand). */ \
if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
else { ss_a_num = 1; ss_a_den = 1; } \
if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \
else { ss_a_num = 1; ss_a_den = 1; } \
\
/* If there is a zero region to the left of where the diagonal of A
intersects the top edge of the block, adjust the pointer to B and

View File

@@ -264,8 +264,8 @@ void PASTEMAC(ch,varname)( \
needs to occur in units of real values. The value computed
here is divided into the complex pointer offset to cause the
pointer to be advanced by the correct value. */ \
if ( bli_is_4m_packed( schema_b ) || \
bli_is_3m_packed( schema_b ) || \
if ( bli_is_4mi_packed( schema_b ) || \
bli_is_3mi_packed( schema_b ) || \
bli_is_rih_packed( schema_b ) ) off_scl = 2; \
else off_scl = 1; \
\
@@ -275,8 +275,8 @@ void PASTEMAC(ch,varname)( \
packing formats are not applicable here since trsm is a two-
operand operation only (unlike trmm, which is capable of three-
operand). */ \
if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
else { ss_b_num = 1; ss_b_den = 1; } \
if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
else { ss_b_num = 1; ss_b_den = 1; } \
\
/* If there is a zero region above where the diagonal of B intersects
the left edge of the panel, adjust the pointer to A and treat this

View File

@@ -263,8 +263,8 @@ void PASTEMAC(ch,varname)( \
needs to occur in units of real values. The value computed
here is divided into the complex pointer offset to cause the
pointer to be advanced by the correct value. */ \
if ( bli_is_4m_packed( schema_b ) || \
bli_is_3m_packed( schema_b ) || \
if ( bli_is_4mi_packed( schema_b ) || \
bli_is_3mi_packed( schema_b ) || \
bli_is_rih_packed( schema_b ) ) off_scl = 2; \
else off_scl = 1; \
\
@@ -274,8 +274,8 @@ void PASTEMAC(ch,varname)( \
packing formats are not applicable here since trsm is a two-
operand operation only (unlike trmm, which is capable of three-
operand). */ \
if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
else { ss_b_num = 1; ss_b_den = 1; } \
if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \
else { ss_b_num = 1; ss_b_den = 1; } \
\
/* If there is a zero region to the left of where the diagonal of B
intersects the top edge of the panel, adjust the pointer to C and

View File

@@ -213,13 +213,13 @@
\
( ( (obj).info & BLIS_PACK_PANEL_BIT ) )
#define bli_obj_is_4m_packed( obj ) \
#define bli_obj_is_4mi_packed( obj ) \
\
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4M )
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4MI )
#define bli_obj_is_3m_packed( obj ) \
#define bli_obj_is_3mi_packed( obj ) \
\
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3M )
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MI )
#define bli_obj_is_ro_packed( obj ) \
\

View File

@@ -521,13 +521,13 @@
\
( ( schema & BLIS_PACK_PANEL_BIT ) )
#define bli_is_4m_packed( schema ) \
#define bli_is_4mi_packed( schema ) \
\
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4M )
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4MI )
#define bli_is_3m_packed( schema ) \
#define bli_is_3mi_packed( schema ) \
\
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3M )
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MI )
#define bli_is_ro_packed( schema ) \
\

View File

@@ -198,35 +198,35 @@ typedef dcomplex f77_dcomplex;
- 13: domain (0 == real, 1 == complex)
- 14: precision (0 == single, 1 == double)
- 15: unused
21 ~ 16 Packed type/status
- 000000: not packed
- 100000: packed (unspecified; by rows, columns, or vector)
- 100000: packed by rows
- 100001: packed by columns
- 100010: packed by row panels
- 100011: packed by column panels
- 100110: packed by 4m row panels
- 100111: packed by 4m column panels
- 101010: packed by 3m row panels
- 101011: packed by 3m column panels
- 110010: packed real-only row panels
- 110011: packed real-only column panels
- 110110: packed imag-only row panels
- 110111: packed imag-only column panels
- 111010: packed real+imag row panels
- 111011: packed real+imag column panels
22 Packed panel order if upper-stored
22 ~ 16 Packed type/status
- 0 0000 00: not packed
- 1 0000 00: packed (unspecified; by rows, columns, or vector)
- 1 0000 00: packed by rows
- 1 0000 01: packed by columns
- 1 0000 10: packed by row panels
- 1 0000 11: packed by column panels
- 1 0001 10: packed by 4m interleaved row panels
- 1 0001 11: packed by 4m interleaved column panels
- 1 0010 10: packed by 3m interleaved row panels
- 1 0010 11: packed by 3m interleaved column panels
- 1 0101 10: packed real-only row panels
- 1 0101 11: packed real-only column panels
- 1 0110 10: packed imag-only row panels
- 1 0110 11: packed imag-only column panels
- 1 0111 10: packed real+imag row panels
- 1 0111 11: packed real+imag column panels
23 Packed panel order if upper-stored
- 0 == forward order if upper
- 1 == reverse order if upper
23 Packed panel order if lower-stored
24 Packed panel order if lower-stored
- 0 == forward order if lower
- 1 == reverse order if lower
25 ~ 24 Packed buffer type
26 ~ 25 Packed buffer type
- 0 == block of A
- 1 == panel of B
- 2 == panel of C
- 3 == general use
27 ~ 26 Structure type
28 ~ 27 Structure type
- 0 == general
- 1 == Hermitian
- 2 == symmetric
@@ -251,11 +251,11 @@ typedef dcomplex f77_dcomplex;
#define BLIS_PACK_RC_SHIFT 16
#define BLIS_PACK_PANEL_SHIFT 17
#define BLIS_PACK_FORMAT_SHIFT 18
#define BLIS_PACK_SHIFT 21
#define BLIS_PACK_REV_IF_UPPER_SHIFT 22
#define BLIS_PACK_REV_IF_LOWER_SHIFT 23
#define BLIS_PACK_BUFFER_SHIFT 24
#define BLIS_STRUC_SHIFT 26
#define BLIS_PACK_SHIFT 22
#define BLIS_PACK_REV_IF_UPPER_SHIFT 23
#define BLIS_PACK_REV_IF_LOWER_SHIFT 24
#define BLIS_PACK_BUFFER_SHIFT 25
#define BLIS_STRUC_SHIFT 27
//
// -- BLIS info bit field masks ------------------------------------------------
@@ -275,10 +275,10 @@ typedef dcomplex f77_dcomplex;
#define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT )
#define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT )
#define BLIS_EXECUTION_DT_BITS ( 0x7 << BLIS_EXECUTION_DT_SHIFT )
#define BLIS_PACK_SCHEMA_BITS ( 0x3F << BLIS_PACK_SCHEMA_SHIFT )
#define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT )
#define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT )
#define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT )
#define BLIS_PACK_FORMAT_BITS ( 0x7 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_PACK_FORMAT_BITS ( 0xF << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_PACK_BIT ( 0x1 << BLIS_PACK_SHIFT )
#define BLIS_PACK_REV_IF_UPPER_BIT ( 0x1 << BLIS_PACK_REV_IF_UPPER_SHIFT )
#define BLIS_PACK_REV_IF_LOWER_BIT ( 0x1 << BLIS_PACK_REV_IF_LOWER_SHIFT )
@@ -290,61 +290,61 @@ typedef dcomplex f77_dcomplex;
// -- BLIS enumerated type value definitions -----------------------------------
//
#define BLIS_BITVAL_REAL 0x0
#define BLIS_BITVAL_COMPLEX BLIS_DOMAIN_BIT
#define BLIS_BITVAL_SINGLE_PREC 0x0
#define BLIS_BITVAL_DOUBLE_PREC BLIS_PRECISION_BIT
#define BLIS_BITVAL_FLOAT_TYPE 0x0
#define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT
#define BLIS_BITVAL_DOUBLE_TYPE BLIS_PRECISION_BIT
#define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT )
#define BLIS_BITVAL_INT_TYPE 0x04
#define BLIS_BITVAL_CONST_TYPE 0x05
#define BLIS_BITVAL_NO_TRANS 0x0
#define BLIS_BITVAL_TRANS BLIS_TRANS_BIT
#define BLIS_BITVAL_NO_CONJ 0x0
#define BLIS_BITVAL_CONJ BLIS_CONJ_BIT
#define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT )
#define BLIS_BITVAL_ZEROS 0x0
#define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT )
#define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT )
#define BLIS_BITVAL_DENSE BLIS_UPLO_BITS
#define BLIS_BITVAL_NONUNIT_DIAG 0x0
#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT
#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT
#define BLIS_BITVAL_NOT_PACKED 0x0
#define BLIS_BITVAL_4M ( 0x1 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_3M ( 0x2 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_RO ( 0x5 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_IO ( 0x6 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_RPI ( 0x7 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_PACKED_UNSPEC BLIS_PACK_BIT
#define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT )
#define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_4M ( BLIS_PACK_BIT | BLIS_BITVAL_4M | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_4M ( BLIS_PACK_BIT | BLIS_BITVAL_4M | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_3M ( BLIS_PACK_BIT | BLIS_BITVAL_3M | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_3M ( BLIS_PACK_BIT | BLIS_BITVAL_3M | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_REAL 0x0
#define BLIS_BITVAL_COMPLEX BLIS_DOMAIN_BIT
#define BLIS_BITVAL_SINGLE_PREC 0x0
#define BLIS_BITVAL_DOUBLE_PREC BLIS_PRECISION_BIT
#define BLIS_BITVAL_FLOAT_TYPE 0x0
#define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT
#define BLIS_BITVAL_DOUBLE_TYPE BLIS_PRECISION_BIT
#define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT )
#define BLIS_BITVAL_INT_TYPE 0x04
#define BLIS_BITVAL_CONST_TYPE 0x05
#define BLIS_BITVAL_NO_TRANS 0x0
#define BLIS_BITVAL_TRANS BLIS_TRANS_BIT
#define BLIS_BITVAL_NO_CONJ 0x0
#define BLIS_BITVAL_CONJ BLIS_CONJ_BIT
#define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT )
#define BLIS_BITVAL_ZEROS 0x0
#define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT )
#define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT )
#define BLIS_BITVAL_DENSE BLIS_UPLO_BITS
#define BLIS_BITVAL_NONUNIT_DIAG 0x0
#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT
#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT
#define BLIS_BITVAL_NOT_PACKED 0x0
#define BLIS_BITVAL_4MI ( 0x1 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_3MI ( 0x2 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_RO ( 0x5 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_IO ( 0x6 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_RPI ( 0x7 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_PACKED_UNSPEC BLIS_PACK_BIT
#define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT )
#define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
#define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
#define BLIS_BITVAL_PACK_REV_IF_LOWER BLIS_PACK_REV_IF_LOWER_BIT
#define BLIS_BITVAL_BUFFER_FOR_A_BLOCK 0x0
#define BLIS_BITVAL_BUFFER_FOR_B_PANEL ( 0x1 << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_BITVAL_BUFFER_FOR_C_PANEL ( 0x2 << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_BITVAL_BUFFER_FOR_GEN_USE ( 0x3 << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_BITVAL_GENERAL 0x0
#define BLIS_BITVAL_HERMITIAN ( 0x1 << BLIS_STRUC_SHIFT )
#define BLIS_BITVAL_SYMMETRIC ( 0x2 << BLIS_STRUC_SHIFT )
#define BLIS_BITVAL_TRIANGULAR ( 0x3 << BLIS_STRUC_SHIFT )
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
#define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
#define BLIS_BITVAL_PACK_REV_IF_LOWER BLIS_PACK_REV_IF_LOWER_BIT
#define BLIS_BITVAL_BUFFER_FOR_A_BLOCK 0x0
#define BLIS_BITVAL_BUFFER_FOR_B_PANEL ( 0x1 << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_BITVAL_BUFFER_FOR_C_PANEL ( 0x2 << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_BITVAL_BUFFER_FOR_GEN_USE ( 0x3 << BLIS_PACK_BUFFER_SHIFT )
#define BLIS_BITVAL_GENERAL 0x0
#define BLIS_BITVAL_HERMITIAN ( 0x1 << BLIS_STRUC_SHIFT )
#define BLIS_BITVAL_SYMMETRIC ( 0x2 << BLIS_STRUC_SHIFT )
#define BLIS_BITVAL_TRIANGULAR ( 0x3 << BLIS_STRUC_SHIFT )
//
@@ -431,21 +431,21 @@ typedef enum
typedef enum
{
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
BLIS_PACKED_ROW_PANELS_4M = BLIS_BITVAL_PACKED_ROW_PANELS_4M,
BLIS_PACKED_COL_PANELS_4M = BLIS_BITVAL_PACKED_COL_PANELS_4M,
BLIS_PACKED_ROW_PANELS_3M = BLIS_BITVAL_PACKED_ROW_PANELS_3M,
BLIS_PACKED_COL_PANELS_3M = BLIS_BITVAL_PACKED_COL_PANELS_3M,
BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO,
BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO,
BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO,
BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO,
BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED,
BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC,
BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS,
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
BLIS_PACKED_ROW_PANELS_4MI = BLIS_BITVAL_PACKED_ROW_PANELS_4MI,
BLIS_PACKED_COL_PANELS_4MI = BLIS_BITVAL_PACKED_COL_PANELS_4MI,
BLIS_PACKED_ROW_PANELS_3MI = BLIS_BITVAL_PACKED_ROW_PANELS_3MI,
BLIS_PACKED_COL_PANELS_3MI = BLIS_BITVAL_PACKED_COL_PANELS_3MI,
BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO,
BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO,
BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO,
BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO,
BLIS_PACKED_ROW_PANELS_RPI = BLIS_BITVAL_PACKED_ROW_PANELS_RPI,
BLIS_PACKED_COL_PANELS_RPI = BLIS_BITVAL_PACKED_COL_PANELS_RPI,
} pack_t;