Files
blis/frame/1m/packm/bli_packm_struc_cxk_3mis.c
Field G. Van Zee 4b36e85be9 Converted function-like macros to static functions.
Details:
- Converted most C preprocessor macros in bli_param_macro_defs.h and
  bli_obj_macro_defs.h to static functions.
- Reshuffled some functions/macros to bli_misc_macro_defs.h and also
  between bli_param_macro_defs.h and bli_obj_macro_defs.h.
- Changed obj_t-initializing macros in bli_type_defs.h to static
  functions.
- Removed some old references to BLIS_TWO and BLIS_MINUS_TWO from
  bli_constants.h.
- Whitespace changes in select files (four spaces to single tab).
2018-05-08 14:26:30 -05:00

792 lines
21 KiB
C

/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
#undef GENTFUNCCO
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \
\
void PASTEMAC(ch,varname) \
( \
struc_t strucc, \
doff_t diagoffc, \
diag_t diagc, \
uplo_t uploc, \
conj_t conjc, \
pack_t schema, \
bool_t invdiag, \
dim_t m_panel, \
dim_t n_panel, \
dim_t m_panel_max, \
dim_t n_panel_max, \
ctype* restrict kappa, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict p, inc_t rs_p, inc_t cs_p, \
inc_t is_p, \
cntx_t* cntx \
) \
{ \
dim_t panel_dim; \
dim_t panel_len; \
inc_t incc, ldc; \
inc_t ldp; \
\
\
/* Determine the dimensions and relative strides of the micro-panel
based on its pack schema. */ \
if ( bli_is_col_packed( schema ) ) \
{ \
/* Prepare to pack to row-stored column panel. */ \
panel_dim = n_panel; \
panel_len = m_panel; \
incc = cs_c; \
ldc = rs_c; \
ldp = rs_p; \
} \
else /* if ( bli_is_row_packed( schema ) ) */ \
{ \
/* Prepare to pack to column-stored row panel. */ \
panel_dim = m_panel; \
panel_len = n_panel; \
incc = rs_c; \
ldc = cs_c; \
ldp = cs_p; \
} \
\
\
/* Handle micro-panel packing based on the structure of the matrix
being packed. */ \
if ( bli_is_general( strucc ) ) \
{ \
/* For micro-panels of general matrices, we can call the pack
kernel front-end directly. */ \
PASTEMAC(ch,kername) \
( \
conjc, \
panel_dim, \
panel_len, \
kappa, \
c, incc, ldc, \
p, is_p, ldp, \
cntx \
); \
} \
else if ( bli_is_herm_or_symm( strucc ) ) \
{ \
/* Call a helper function for micro-panels of Hermitian/symmetric
matrices. */ \
PASTEMAC(ch,packm_herm_cxk_3mis) \
( \
strucc, \
diagoffc, \
uploc, \
conjc, \
schema, \
m_panel, \
n_panel, \
m_panel_max, \
n_panel_max, \
panel_dim, \
panel_len, \
kappa, \
c, rs_c, cs_c, \
incc, ldc, \
p, rs_p, cs_p, \
is_p, ldp, \
cntx \
); \
} \
else /* ( bli_is_triangular( strucc ) ) */ \
{ \
/* Call a helper function for micro-panels of triangular
matrices. */ \
PASTEMAC(ch,packm_tri_cxk_3mis) \
( \
strucc, \
diagoffc, \
diagc, \
uploc, \
conjc, \
schema, \
invdiag, \
m_panel, \
n_panel, \
m_panel_max, \
n_panel_max, \
panel_dim, \
panel_len, \
kappa, \
c, rs_c, cs_c, \
incc, ldc, \
p, rs_p, cs_p, \
is_p, ldp, \
cntx \
); \
} \
\
\
/* The packed memory region was acquired/allocated with "aligned"
dimensions (ie: dimensions that were possibly inflated up to a
multiple). When these dimension are inflated, it creates empty
regions along the bottom and/or right edges of the matrix. If
either region exists, we set them to zero. This allows the
micro-kernel to remain simple since it does not need to support
different register blockings for the edge cases. */ \
if ( m_panel != m_panel_max ) \
{ \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
dim_t i = m_panel; \
dim_t m_edge = m_panel_max - i; \
dim_t n_edge = n_panel_max; \
ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \
ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*rs_p; \
ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*rs_p; \
\
PASTEMAC(chr,setm) \
( \
BLIS_NO_CONJUGATE, \
0, \
BLIS_NONUNIT_DIAG, \
BLIS_DENSE, \
m_edge, \
n_edge, \
zero_r, \
p_edge_r, rs_p, cs_p, \
cntx \
); \
PASTEMAC(chr,setm) \
( \
BLIS_NO_CONJUGATE, \
0, \
BLIS_NONUNIT_DIAG, \
BLIS_DENSE, \
m_edge, \
n_edge, \
zero_r, \
p_edge_i, rs_p, cs_p, \
cntx \
); \
PASTEMAC(chr,setm) \
( \
BLIS_NO_CONJUGATE, \
0, \
BLIS_NONUNIT_DIAG, \
BLIS_DENSE, \
m_edge, \
n_edge, \
zero_r, \
p_edge_rpi, rs_p, cs_p, \
cntx \
); \
} \
\
if ( n_panel != n_panel_max ) \
{ \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
dim_t j = n_panel; \
dim_t m_edge = m_panel_max; \
dim_t n_edge = n_panel_max - j; \
ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \
ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*cs_p; \
ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*cs_p; \
\
PASTEMAC(chr,setm) \
( \
BLIS_NO_CONJUGATE, \
0, \
BLIS_NONUNIT_DIAG, \
BLIS_DENSE, \
m_edge, \
n_edge, \
zero_r, \
p_edge_r, rs_p, cs_p, \
cntx \
); \
PASTEMAC(chr,setm) \
( \
BLIS_NO_CONJUGATE, \
0, \
BLIS_NONUNIT_DIAG, \
BLIS_DENSE, \
m_edge, \
n_edge, \
zero_r, \
p_edge_i, rs_p, cs_p, \
cntx \
); \
PASTEMAC(chr,setm) \
( \
BLIS_NO_CONJUGATE, \
0, \
BLIS_NONUNIT_DIAG, \
BLIS_DENSE, \
m_edge, \
n_edge, \
zero_r, \
p_edge_rpi, rs_p, cs_p, \
cntx \
); \
} \
\
\
if ( bli_is_triangular( strucc ) ) \
{ \
/* If this panel is an edge case in both panel dimension and length,
then it must be a bottom-right corner case. Set the part of the
diagonal that extends into the zero-padded region to identity.
NOTE: This is actually only necessary when packing for trsm, as
it helps prevent NaNs and Infs from creeping into the computation.
However, we set the region to identity for trmm as well. Those
1.0's end up getting muliplied by the 0.0's in the zero-padded
region of the other matrix, so there is no harm in this. */ \
if ( m_panel != m_panel_max && \
n_panel != n_panel_max ) \
{ \
ctype_r* restrict one_r = PASTEMAC(chr,1); \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
dim_t i = m_panel; \
dim_t j = n_panel; \
dim_t m_br = m_panel_max - i; \
dim_t n_br = n_panel_max - j; \
ctype_r* p_br_r = ( ctype_r* )p + (i )*rs_p + (j )*cs_p; \
ctype_r* p_br_i = ( ctype_r* )p + is_p + (i )*rs_p + (j )*cs_p; \
\
PASTEMAC(chr,setd) \
( \
BLIS_NO_CONJUGATE, \
0, \
m_br, \
n_br, \
one_r, \
p_br_r, rs_p, cs_p, \
cntx \
); \
PASTEMAC(chr,setd) \
( \
BLIS_NO_CONJUGATE, \
0, \
m_br, \
n_br, \
zero_r, \
p_br_i, rs_p, cs_p, \
cntx \
); \
} \
} \
}
INSERT_GENTFUNCCO_BASIC( packm_struc_cxk_3mis, packm_cxk_3mis )
#undef GENTFUNCCO
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \
\
void PASTEMAC(ch,varname) \
( \
struc_t strucc, \
doff_t diagoffc, \
uplo_t uploc, \
conj_t conjc, \
pack_t schema, \
dim_t m_panel, \
dim_t n_panel, \
dim_t m_panel_max, \
dim_t n_panel_max, \
dim_t panel_dim, \
dim_t panel_len, \
ctype* restrict kappa, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
inc_t incc, inc_t ldc, \
ctype* restrict p, inc_t rs_p, inc_t cs_p, \
inc_t is_p, inc_t ldp, \
cntx_t* cntx \
) \
{ \
doff_t diagoffc_abs; \
dim_t i, j; \
bool_t row_stored; \
bool_t col_stored; \
\
\
/* Create flags to incidate row or column storage. Note that the
schema bit that encodes row or column is describing the form of
micro-panel, not the storage in the micro-panel. Hence the
mismatch in "row" and "column" semantics. */ \
row_stored = bli_is_col_packed( schema ); \
col_stored = bli_is_row_packed( schema ); \
\
\
/* Handle the case where the micro-panel does NOT intersect the
diagonal separately from the case where it does intersect. */ \
if ( !bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) \
{ \
/* If the current panel is unstored, we need to make a few
adjustments so we refer to the data where it is actually
stored, also taking conjugation into account. (Note this
implicitly assumes we are operating on a dense panel
within a larger symmetric or Hermitian matrix, since a
general matrix would not contain any unstored region.) */ \
if ( bli_is_unstored_subpart_n( diagoffc, uploc, m_panel, n_panel ) ) \
{ \
c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \
bli_swap_incs( &incc, &ldc ); \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( &conjc ); \
} \
\
/* Pack the full panel. */ \
PASTEMAC(ch,kername) \
( \
conjc, \
panel_dim, \
panel_len, \
kappa, \
c, incc, ldc, \
p, is_p, ldp, \
cntx \
); \
} \
else /* if ( bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) */ \
{ \
ctype_r* restrict p_r = ( ctype_r* )p; \
\
ctype_r* restrict one_r = PASTEMAC(chr,1); \
ctype_r* restrict minus_one_r = PASTEMAC(chr,m1); \
\
ctype* restrict c10; \
ctype_r* restrict p10; \
dim_t p10_dim, p10_len; \
inc_t incc10, ldc10; \
doff_t diagoffc10; \
conj_t conjc10; \
\
ctype* restrict c12; \
ctype_r* restrict p12; \
dim_t p12_dim, p12_len; \
inc_t incc12, ldc12; \
doff_t diagoffc12; \
conj_t conjc12; \
\
/* Sanity check. Diagonals should not intersect the short end of
a micro-panel. If they do, then somehow the constraints on
cache blocksizes being a whole multiple of the register
blocksizes was somehow violated. */ \
if ( ( col_stored && diagoffc < 0 ) || \
( row_stored && diagoffc > 0 ) ) \
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \
\
diagoffc_abs = bli_abs( diagoffc ); \
\
if ( ( row_stored && bli_is_upper( uploc ) ) || \
( col_stored && bli_is_lower( uploc ) ) ) \
{ \
p10_dim = panel_dim; \
p10_len = diagoffc_abs; \
p10 = p_r; \
c10 = c; \
incc10 = incc; \
ldc10 = ldc; \
conjc10 = conjc; \
\
p12_dim = panel_dim; \
p12_len = panel_len - p10_len; \
j = p10_len; \
diagoffc12 = diagoffc_abs - j; \
p12 = p_r + (j )*ldp; \
c12 = c + (j )*ldc; \
c12 = c12 + diagoffc12 * ( doff_t )cs_c + \
-diagoffc12 * ( doff_t )rs_c; \
incc12 = ldc; \
ldc12 = incc; \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( &conjc12 ); \
} \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \
{ \
p10_dim = panel_dim; \
p10_len = diagoffc_abs + panel_dim; \
diagoffc10 = diagoffc; \
p10 = p_r; \
c10 = c; \
c10 = c10 + diagoffc10 * ( doff_t )cs_c + \
-diagoffc10 * ( doff_t )rs_c; \
incc10 = ldc; \
ldc10 = incc; \
conjc10 = conjc; \
\
p12_dim = panel_dim; \
p12_len = panel_len - p10_len; \
j = p10_len; \
p12 = p_r + (j )*ldp; \
c12 = c + (j )*ldc; \
incc12 = incc; \
ldc12 = ldc; \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( &conjc10 ); \
} \
\
/* Pack to p10. For upper storage, this includes the unstored
triangle of c11. */ \
PASTEMAC(ch,kername) \
( \
conjc10, \
p10_dim, \
p10_len, \
kappa, \
c10, incc10, ldc10, \
( ctype* )p10, is_p, ldp, \
cntx \
); \
\
/* Pack to p12. For lower storage, this includes the unstored
triangle of c11. */ \
PASTEMAC(ch,kername) \
( \
conjc12, \
p12_dim, \
p12_len, \
kappa, \
c12, incc12, ldc12, \
( ctype* )p12, is_p, ldp, \
cntx \
); \
\
/* Pack the stored triangle of c11 to p11. */ \
{ \
dim_t p11_m = panel_dim; \
dim_t p11_n = panel_dim; \
inc_t rs_c11 = 2*rs_c; \
inc_t cs_c11 = 2*cs_c; \
dim_t j2 = diagoffc_abs; \
ctype* c11 = ( ctype* )c + (j2 )*ldc; \
ctype_r* p11 = ( ctype_r* )p_r + (j2 )*ldp; \
ctype_r* c11_r = ( ctype_r* )c11; \
ctype_r* c11_i = ( ctype_r* )c11 + 1; \
ctype_r* p11_r = ( ctype_r* )p11; \
ctype_r* p11_i = ( ctype_r* )p11 + is_p; \
ctype_r* alpha_r = one_r; \
ctype_r* alpha_i = ( bli_is_conj( conjc ) ? minus_one_r : one_r ); \
ctype_r kappa_r = PASTEMAC(ch,real)( *kappa ); \
ctype_r kappa_i = PASTEMAC(ch,imag)( *kappa ); \
\
/* Copy the real part of the stored triangle of c11 to p11_r. */ \
PASTEMAC(chr,scal2m) \
( \
0, \
BLIS_NONUNIT_DIAG, \
uploc, \
BLIS_NO_TRANSPOSE, \
p11_m, \
p11_n, \
alpha_r, \
c11_r, rs_c11, cs_c11, \
p11_r, rs_p, cs_p, \
cntx \
); \
\
/* Copy the imaginary part of the stored triangle of c11 to p11_i,
scaling by -1 if conjugation on c was requested. */ \
PASTEMAC(chr,scal2m) \
( \
0, \
BLIS_NONUNIT_DIAG, \
uploc, \
BLIS_NO_TRANSPOSE, \
p11_m, \
p11_n, \
alpha_i, \
c11_i, rs_c11, cs_c11, \
p11_i, rs_p, cs_p, \
cntx \
); \
\
/* If source matrix c is Hermitian, we have to zero out the
imaginary components of the diagonal of p11 in case the
corresponding elements in c11 were not already zero. */ \
if ( bli_is_hermitian( strucc ) ) \
{ \
for ( i = 0; i < p11_m; ++i ) \
{ \
ctype_r* pi11_i = p11_i + (i )*rs_p + (i )*cs_p; \
\
PASTEMAC(chr,set0s)( *pi11_i ); \
} \
} \
\
/* Apply kappa to the part of p11 that corresponds to the stored
part of c11 that was copied above. */ \
if ( bli_is_upper( uploc ) ) \
{ \
PASTEMAC(ch,scalris_mxn_u) \
( \
0, \
p11_m, \
p11_n, \
&kappa_r, \
&kappa_i, \
p11_r, \
p11_i, rs_p, cs_p \
); \
} \
else \
{ \
PASTEMAC(ch,scalris_mxn_l) \
( \
0, \
p11_m, \
p11_n, \
&kappa_r, \
&kappa_i, \
p11_r, \
p11_i, rs_p, cs_p \
); \
} \
\
/* Update the p11 section of the ri panel. It simply needs
to contain the sum of p11_r + p11_i. */ \
{ \
ctype_r* p11_rpi = p11_i + is_p; \
\
for ( j = 0; j < p11_n; ++j ) \
for ( i = 0; i < p11_m; ++i ) \
{ \
ctype_r* pi11_r = p11_r + (i )*rs_p + (j )*cs_p; \
ctype_r* pi11_i = p11_i + (i )*rs_p + (j )*cs_p; \
ctype_r* pi11_rpi = p11_rpi + (i )*rs_p + (j )*cs_p; \
\
PASTEMAC(chr,add3s) \
( \
*pi11_r, \
*pi11_i, \
*pi11_rpi \
); \
} \
} \
} \
} \
}
INSERT_GENTFUNCCO_BASIC( packm_herm_cxk_3mis, packm_cxk_3mis )
#undef GENTFUNCCO
#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \
\
void PASTEMAC(ch,varname) \
( \
struc_t strucc, \
doff_t diagoffp, \
diag_t diagc, \
uplo_t uploc, \
conj_t conjc, \
pack_t schema, \
bool_t invdiag, \
dim_t m_panel, \
dim_t n_panel, \
dim_t m_panel_max, \
dim_t n_panel_max, \
dim_t panel_dim, \
dim_t panel_len, \
ctype* restrict kappa, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
inc_t incc, inc_t ldc, \
ctype* restrict p, inc_t rs_p, inc_t cs_p, \
inc_t is_p, inc_t ldp, \
cntx_t* cntx \
) \
{ \
/* Pack the panel. */ \
PASTEMAC(ch,kername) \
( \
conjc, \
panel_dim, \
panel_len, \
kappa, \
c, incc, ldc, \
p, is_p, ldp, \
cntx \
); \
\
\
/* Tweak the panel according to its triangular structure */ \
{ \
ctype_r* p_r = ( ctype_r* )p + 0; \
ctype_r* p_i = ( ctype_r* )p + is_p; \
ctype_r* p_rpi = ( ctype_r* )p + 2*is_p; \
\
dim_t j = bli_abs( diagoffp ); \
ctype_r* p11_r = p_r + (j )*ldp; \
ctype_r* p11_i = p_i + (j )*ldp; \
ctype_r* p11_rpi = p_rpi + (j )*ldp; \
\
dim_t p11_m = m_panel; \
dim_t p11_n = n_panel; \
\
dim_t min_p11_m_n; \
\
if ( diagoffp < 0 ) p11_m -= j; \
else if ( diagoffp > 0 ) p11_n -= j; \
\
min_p11_m_n = bli_min( p11_m, p11_n ); \
\
\
/* If the diagonal of c is implicitly unit, explicitly set the
the diagonal of the packed panel to kappa. */ \
if ( bli_is_unit_diag( diagc ) ) \
{ \
ctype_r kappa_r = PASTEMAC(ch,real)( *kappa ); \
ctype_r kappa_i = PASTEMAC(ch,imag)( *kappa ); \
dim_t i; \
\
PASTEMAC(chr,setd) \
( \
BLIS_NO_CONJUGATE, \
diagoffp, \
m_panel, \
n_panel, \
&kappa_r, \
p_r, rs_p, cs_p, \
cntx \
); \
PASTEMAC(chr,setd) \
( \
BLIS_NO_CONJUGATE, \
diagoffp, \
m_panel, \
n_panel, \
&kappa_i, \
p_i, rs_p, cs_p, \
cntx \
); \
\
/* Update the diagonal of the p11 section of the rpi panel.
It simply needs to contain the sum of diagonals of p11_r
and p11_i. */ \
for ( i = 0; i < min_p11_m_n; ++i ) \
{ \
ctype_r* pi11_r = p11_r + (i )*rs_p + (i )*cs_p; \
ctype_r* pi11_i = p11_i + (i )*rs_p + (i )*cs_p; \
ctype_r* pi11_rpi = p11_rpi + (i )*rs_p + (i )*cs_p; \
\
PASTEMAC(chr,add3s)( *pi11_r, *pi11_i, *pi11_rpi ); \
} \
} \
\
/* If requested, invert the diagonal of the packed panel. Note
that we do not need to update the ri panel since inverted
diagonals are only needed by trsm, which does not use the
p11 section of the ri panel. */ \
if ( invdiag == TRUE ) \
{ \
dim_t i; \
\
for ( i = 0; i < min_p11_m_n; ++i ) \
{ \
ctype_r* pi11_r = p11_r + (i )*rs_p + (i )*cs_p; \
ctype_r* pi11_i = p11_i + (i )*rs_p + (i )*cs_p; \
\
PASTEMAC(ch,invertris)( *pi11_r, *pi11_i ); \
} \
} \
\
/* Set the region opposite the diagonal of p to zero. To do this,
we need to reference the "unstored" region on the other side of
the diagonal. This amounts to toggling uploc and then shifting
the diagonal offset to shrink the newly referenced region (by
one diagonal). Note that this zero-filling is not needed for
trsm, since the unstored region is not referenced by the trsm
micro-kernel; however, zero-filling is needed for trmm, which
uses the gemm micro-kernel.*/ \
{ \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
uplo_t uplop = uploc; \
\
bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\
PASTEMAC(chr,setm) \
( \
BLIS_NO_CONJUGATE, \
diagoffp, \
BLIS_NONUNIT_DIAG, \
uplop, \
m_panel, \
n_panel, \
zero_r, \
p_r, rs_p, cs_p, \
cntx \
); \
PASTEMAC(chr,setm) \
( \
BLIS_NO_CONJUGATE, \
diagoffp, \
BLIS_NONUNIT_DIAG, \
uplop, \
m_panel, \
n_panel, \
zero_r, \
p_i, rs_p, cs_p, \
cntx \
); \
PASTEMAC(chr,setm) \
( \
BLIS_NO_CONJUGATE, \
diagoffp, \
BLIS_NONUNIT_DIAG, \
uplop, \
m_panel, \
n_panel, \
zero_r, \
p_rpi, rs_p, cs_p, \
cntx \
); \
} \
} \
}
INSERT_GENTFUNCCO_BASIC( packm_tri_cxk_3mis, packm_cxk_3mis )