mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Refactored packm variants.
Details: - Revised packm_blk_var2() and _var3() by encapsulating the general, hermitian/symmetric, and triangular panel-packing subproblems into separate functions: packm_gen_cxk(), packm_herm_cxk(), and packm_tri_cxk(), respectively. Also, homogenized the packm code as well as the new specialized packm_*_cxk() code to further improve readability.
This commit is contained in:
@@ -46,4 +46,7 @@
|
||||
#include "bli_packm_blk_var3.h"
|
||||
|
||||
#include "bli_packm_cxk.h"
|
||||
#include "bli_packm_gen_cxk.h"
|
||||
#include "bli_packm_herm_cxk.h"
|
||||
#include "bli_packm_tri_cxk.h"
|
||||
|
||||
|
||||
@@ -131,53 +131,29 @@ void PASTEMAC(ch,varname )( \
|
||||
dim_t pd_p, inc_t ps_p \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict c_cast = c; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
\
|
||||
ctype* restrict c_begin; \
|
||||
ctype* restrict p_begin; \
|
||||
\
|
||||
dim_t iter_dim; \
|
||||
dim_t num_iter; \
|
||||
dim_t it, ic, ip; \
|
||||
dim_t i, j; \
|
||||
\
|
||||
dim_t ic0, ip0; \
|
||||
dim_t ic_inc, ip_inc; \
|
||||
dim_t panel_dim; \
|
||||
dim_t panel_len; \
|
||||
doff_t diagoffc_i; \
|
||||
doff_t diagoffc_inc; \
|
||||
doff_t diagoffc_i_abs; \
|
||||
\
|
||||
dim_t panel_dim_i; \
|
||||
inc_t vs_c; \
|
||||
inc_t incc, ldc; \
|
||||
inc_t ldp; \
|
||||
dim_t* m_panel; \
|
||||
dim_t* n_panel; \
|
||||
dim_t m_panel_max; \
|
||||
dim_t n_panel_max; \
|
||||
conj_t conjc; \
|
||||
\
|
||||
ctype* restrict c10; \
|
||||
ctype* restrict p10; \
|
||||
dim_t p10_dim, p10_len; \
|
||||
inc_t incc10, ldc10; \
|
||||
doff_t diagoffc10; \
|
||||
conj_t conjc10; \
|
||||
\
|
||||
ctype* restrict c12; \
|
||||
ctype* restrict p12; \
|
||||
dim_t p12_dim, p12_len; \
|
||||
inc_t incc12, ldc12; \
|
||||
doff_t diagoffc12; \
|
||||
conj_t conjc12; \
|
||||
\
|
||||
ctype* restrict c11; \
|
||||
ctype* restrict p11; \
|
||||
dim_t p11_m; \
|
||||
dim_t p11_n; \
|
||||
inc_t rs_p11, cs_p11; \
|
||||
\
|
||||
\
|
||||
/* Extract the conjugation bit from the transposition argument. */ \
|
||||
@@ -200,37 +176,25 @@ void PASTEMAC(ch,varname )( \
|
||||
{ \
|
||||
/* Prepare to pack to row-stored column panels. */ \
|
||||
iter_dim = n; \
|
||||
panel_len = m; \
|
||||
panel_dim = pd_p; \
|
||||
incc = cs_c; \
|
||||
ldc = rs_c; \
|
||||
vs_c = cs_c; \
|
||||
diagoffc_inc = -( doff_t)panel_dim; \
|
||||
ldp = rs_p; \
|
||||
m_panel = &m; \
|
||||
n_panel = &panel_dim_i; \
|
||||
m_panel_max = m_max; \
|
||||
n_panel_max = panel_dim; \
|
||||
rs_p11 = rs_p; \
|
||||
cs_p11 = 1; \
|
||||
} \
|
||||
else /* if ( bli_is_col_stored_f( rs_p, cs_p ) ) */ \
|
||||
{ \
|
||||
/* Prepare to pack to column-stored row panels. */ \
|
||||
iter_dim = m; \
|
||||
panel_len = n; \
|
||||
panel_dim = pd_p; \
|
||||
incc = rs_c; \
|
||||
ldc = cs_c; \
|
||||
vs_c = rs_c; \
|
||||
diagoffc_inc = ( doff_t )panel_dim; \
|
||||
ldp = cs_p; \
|
||||
m_panel = &panel_dim_i; \
|
||||
n_panel = &n; \
|
||||
m_panel_max = panel_dim; \
|
||||
n_panel_max = n_max; \
|
||||
rs_p11 = 1; \
|
||||
cs_p11 = cs_p; \
|
||||
} \
|
||||
\
|
||||
/* Compute the total number of iterations we'll need. */ \
|
||||
@@ -256,216 +220,36 @@ void PASTEMAC(ch,varname )( \
|
||||
\
|
||||
/* If the current panel intersects the diagonal and C is either
|
||||
upper- or lower-stored, then we assume C is symmetric or
|
||||
Hermitian and that it must be densified (note we don't even
|
||||
bother passing in a densify parameter), in which case we pack
|
||||
the panel in three stages.
|
||||
Hermitian and that it must be densified.
|
||||
Otherwise, we pack the panel all at once. */ \
|
||||
if ( bli_intersects_diag_n( diagoffc_i, *m_panel, *n_panel ) && \
|
||||
bli_is_upper_or_lower( uploc ) ) \
|
||||
{ \
|
||||
diagoffc_i_abs = bli_abs( diagoffc_i ); \
|
||||
\
|
||||
/* Sanity check. Diagonals should not intersect the short end of
|
||||
a micro-panel. If they do, then somehow the constraints on
|
||||
cache blocksizes being a whole multiple of the register
|
||||
blocksizes was somehow violated. */ \
|
||||
if ( ( bli_is_col_stored_f( rs_p, cs_p ) && diagoffc_i < 0 ) || \
|
||||
( bli_is_row_stored_f( rs_p, cs_p ) && diagoffc_i > 0 ) ) \
|
||||
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \
|
||||
\
|
||||
if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) || \
|
||||
( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) ) \
|
||||
{ \
|
||||
p10_dim = panel_dim_i; \
|
||||
p10_len = diagoffc_i_abs; \
|
||||
p10 = p_begin; \
|
||||
c10 = c_begin; \
|
||||
incc10 = incc; \
|
||||
ldc10 = ldc; \
|
||||
conjc10 = conjc; \
|
||||
\
|
||||
p12_dim = panel_dim_i; \
|
||||
p12_len = panel_len - p10_len; \
|
||||
j = p10_len; \
|
||||
diagoffc12 = diagoffc_i_abs - j; \
|
||||
p12 = p_begin + (j )*ldp; \
|
||||
c12 = c_begin + (j )*ldc; \
|
||||
c12 = c12 + diagoffc12 * ( doff_t )cs_c + \
|
||||
-diagoffc12 * ( doff_t )rs_c; \
|
||||
incc12 = ldc; \
|
||||
ldc12 = incc; \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
p11_m = panel_dim_i; \
|
||||
p11_n = panel_dim_i; \
|
||||
j = diagoffc_i_abs; \
|
||||
p11 = p_begin + (j )*ldp; \
|
||||
c11 = c_begin + (j )*ldc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc12 ); \
|
||||
} \
|
||||
else /* if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) || \
|
||||
( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) ) */ \
|
||||
{ \
|
||||
p10_dim = panel_dim_i; \
|
||||
p10_len = diagoffc_i_abs + panel_dim_i; \
|
||||
diagoffc10 = diagoffc_i; \
|
||||
p10 = p_begin; \
|
||||
c10 = c_begin; \
|
||||
c10 = c10 + diagoffc10 * ( doff_t )cs_c + \
|
||||
-diagoffc10 * ( doff_t )rs_c; \
|
||||
incc10 = ldc; \
|
||||
ldc10 = incc; \
|
||||
conjc10 = conjc; \
|
||||
\
|
||||
p12_dim = panel_dim_i; \
|
||||
p12_len = panel_len - p10_len; \
|
||||
j = p10_len; \
|
||||
p12 = p_begin + (j )*ldp; \
|
||||
c12 = c_begin + (j )*ldc; \
|
||||
incc12 = incc; \
|
||||
ldc12 = ldc; \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
p11_m = panel_dim_i; \
|
||||
p11_n = panel_dim_i; \
|
||||
j = diagoffc_i_abs; \
|
||||
p11 = p_begin + (j )*ldp; \
|
||||
c11 = c_begin + (j )*ldc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc10 ); \
|
||||
} \
|
||||
\
|
||||
/* Pack to P10. For upper storage, this includes the unstored
|
||||
triangle of C11. */ \
|
||||
PASTEMAC(ch,packm_cxk)( conjc10, \
|
||||
p10_dim, \
|
||||
p10_len, \
|
||||
kappa_cast, \
|
||||
c10, incc10, ldc10, \
|
||||
p10, ldp ); \
|
||||
\
|
||||
/* Pack to P12. For lower storage, this includes the unstored
|
||||
triangle of C11. */ \
|
||||
PASTEMAC(ch,packm_cxk)( conjc12, \
|
||||
p12_dim, \
|
||||
p12_len, \
|
||||
kappa_cast, \
|
||||
c12, incc12, ldc12, \
|
||||
p12, ldp ); \
|
||||
\
|
||||
/* Pack the stored triangule of C11 to P11. */ \
|
||||
PASTEMAC3(ch,ch,ch,scal2m_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
p11_m, \
|
||||
p11_n, \
|
||||
kappa_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
p11, rs_p11, cs_p11 ); \
|
||||
\
|
||||
/* If source matrix C is Hermitian, we have to zero out the
|
||||
imaginary components of the diagonal of P11 in case the
|
||||
corresponding elements in C11 were not already zero. */ \
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
{ \
|
||||
/* NOTE: We can directly increment p11 since we are done
|
||||
using p11 for the remainder of the function. */ \
|
||||
for ( i = 0; i < p11_m; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,seti0s)( *p11 ); \
|
||||
\
|
||||
p11 += rs_p11 + cs_p11; \
|
||||
} \
|
||||
} \
|
||||
PASTEMAC(ch,packm_herm_cxk)( strucc, \
|
||||
diagoffc_i, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
*m_panel, \
|
||||
*n_panel, \
|
||||
m_panel_max, \
|
||||
n_panel_max, \
|
||||
kappa, \
|
||||
c_begin, rs_c, cs_c, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* Note that the following code executes if the current panel either:
|
||||
- does not intersect the diagonal, or
|
||||
- does intersect the diagonal, BUT the matrix is general
|
||||
which means the entire current panel can be copied at once. */ \
|
||||
\
|
||||
/* We use some c10-specific variables here because we might need
|
||||
to change them if the current panel is unstored. (The values
|
||||
below are used if the current panel is stored.) */ \
|
||||
c10 = c_begin; \
|
||||
incc10 = incc; \
|
||||
ldc10 = ldc; \
|
||||
conjc10 = conjc; \
|
||||
\
|
||||
/* If the current panel is unstored, we need to make a few
|
||||
adjustments so we refer to the data where it is actually
|
||||
stored, and so we take conjugation into account. (Note
|
||||
this implicitly assumes we are operating on a symmetric or
|
||||
Hermitian matrix, since a general matrix would not contain
|
||||
any unstored region.) */ \
|
||||
if ( bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel, *n_panel ) ) \
|
||||
{ \
|
||||
c10 = c10 + diagoffc_i * ( doff_t )cs_c + \
|
||||
-diagoffc_i * ( doff_t )rs_c; \
|
||||
bli_swap_incs( incc10, ldc10 ); \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc10 ); \
|
||||
} \
|
||||
\
|
||||
/* Pack the current panel. */ \
|
||||
PASTEMAC(ch,packm_cxk)( conjc10, \
|
||||
panel_dim_i, \
|
||||
panel_len, \
|
||||
kappa_cast, \
|
||||
c10, incc10, ldc10, \
|
||||
p_begin, ldp ); \
|
||||
\
|
||||
/*
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: c", panel_len, panel_dim_i, \
|
||||
c_begin, ldc, incc, "%5.2f", "" ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: p copied", panel_len, panel_dim_i, \
|
||||
p_begin, ldp, 1, "%5.2f", "" ); \
|
||||
*/ \
|
||||
} \
|
||||
\
|
||||
/* The packed memory region was acquired/allocated with "aligned"
|
||||
dimensions (ie: dimensions that were possibly inflated up to a
|
||||
multiple). When these dimension are inflated, it creates empty
|
||||
regions along the bottom and/or right edges of the matrix. If
|
||||
either region exists, we set them to zero. This simplifies the
|
||||
register level micro-kernel in that it does not need to support
|
||||
different register blockings for the edge cases. */ \
|
||||
if ( *m_panel != m_panel_max ) \
|
||||
{ \
|
||||
dim_t i = *m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype* p_edge = p_begin + (i )*rs_p; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
if ( *n_panel != n_panel_max ) \
|
||||
{ \
|
||||
dim_t j = *n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype* p_edge = p_begin + (j )*cs_p; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_p, cs_p ); \
|
||||
PASTEMAC(ch,packm_gen_cxk)( strucc, \
|
||||
diagoffc_i, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
*m_panel, \
|
||||
*n_panel, \
|
||||
m_panel_max, \
|
||||
n_panel_max, \
|
||||
kappa, \
|
||||
c_begin, rs_c, cs_c, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/*
|
||||
@@ -474,7 +258,7 @@ void PASTEMAC(ch,varname )( \
|
||||
p_begin, 1, cs_p, "%4.1f", "" ); \
|
||||
if ( cs_p == 1 ) \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: b copied", m_panel_max, n_panel_max, \
|
||||
p_begin, panel_dim, 1, "%8.5f", "" ); \
|
||||
p_begin, rs_p, 1, "%4.1f", "" ); \
|
||||
*/ \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -146,7 +146,6 @@ void PASTEMAC(ch,varname )( \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict c_cast = c; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict c_begin; \
|
||||
ctype* restrict p_begin; \
|
||||
\
|
||||
@@ -155,7 +154,7 @@ void PASTEMAC(ch,varname )( \
|
||||
dim_t it, ic, ip; \
|
||||
dim_t ic0, ip0; \
|
||||
doff_t ic_inc, ip_inc; \
|
||||
dim_t panel_dim; \
|
||||
dim_t panel_dim_max; \
|
||||
dim_t panel_len; \
|
||||
dim_t panel_len_max; \
|
||||
doff_t diagoffc_i; \
|
||||
@@ -165,10 +164,12 @@ void PASTEMAC(ch,varname )( \
|
||||
dim_t panel_len_max_i; \
|
||||
dim_t panel_off_i; \
|
||||
inc_t vs_c; \
|
||||
inc_t incc, ldc; \
|
||||
inc_t ldc; \
|
||||
inc_t ldp, p_inc; \
|
||||
dim_t* m_panel; \
|
||||
dim_t* n_panel; \
|
||||
dim_t* m_panel_full; \
|
||||
dim_t* n_panel_full; \
|
||||
dim_t* m_panel_max; \
|
||||
dim_t* n_panel_max; \
|
||||
conj_t conjc; \
|
||||
\
|
||||
ctype* restrict c_use; \
|
||||
@@ -203,16 +204,17 @@ void PASTEMAC(ch,varname )( \
|
||||
iter_dim = n; \
|
||||
panel_len = m; \
|
||||
panel_len_max = m_max; \
|
||||
panel_dim = pd_p; \
|
||||
incc = cs_c; \
|
||||
panel_dim_max = pd_p; \
|
||||
ldc = rs_c; \
|
||||
vs_c = cs_c; \
|
||||
diagoffc_inc = -( doff_t)panel_dim; \
|
||||
diagoffc_inc = -( doff_t)panel_dim_max; \
|
||||
ldp = rs_p; \
|
||||
m_panel = &m; \
|
||||
n_panel = &panel_dim_i; \
|
||||
m_panel_full = &m; \
|
||||
n_panel_full = &panel_dim_i; \
|
||||
m_panel_use = &panel_len_i; \
|
||||
n_panel_use = &panel_dim_i; \
|
||||
m_panel_max = &panel_len_max_i; \
|
||||
n_panel_max = &panel_dim_max; \
|
||||
} \
|
||||
else /* if ( bli_is_col_stored_f( rs_p, cs_p ) ) */ \
|
||||
{ \
|
||||
@@ -220,35 +222,36 @@ void PASTEMAC(ch,varname )( \
|
||||
iter_dim = m; \
|
||||
panel_len = n; \
|
||||
panel_len_max = n_max; \
|
||||
panel_dim = pd_p; \
|
||||
incc = rs_c; \
|
||||
panel_dim_max = pd_p; \
|
||||
ldc = cs_c; \
|
||||
vs_c = rs_c; \
|
||||
diagoffc_inc = ( doff_t )panel_dim; \
|
||||
diagoffc_inc = ( doff_t )panel_dim_max; \
|
||||
ldp = cs_p; \
|
||||
m_panel = &panel_dim_i; \
|
||||
n_panel = &n; \
|
||||
m_panel_full = &panel_dim_i; \
|
||||
n_panel_full = &n; \
|
||||
m_panel_use = &panel_dim_i; \
|
||||
n_panel_use = &panel_len_i; \
|
||||
m_panel_max = &panel_dim_max; \
|
||||
n_panel_max = &panel_len_max_i; \
|
||||
} \
|
||||
\
|
||||
/* Compute the total number of iterations we'll need. */ \
|
||||
num_iter = iter_dim / panel_dim + ( iter_dim % panel_dim ? 1 : 0 ); \
|
||||
num_iter = iter_dim / panel_dim_max + ( iter_dim % panel_dim_max ? 1 : 0 ); \
|
||||
\
|
||||
/* Set the initial values and increments for indices related to C and P
|
||||
based on whether reverse iteration was requested. */ \
|
||||
if ( ( revifup && bli_is_upper( uploc ) ) || \
|
||||
( reviflo && bli_is_lower( uploc ) ) ) \
|
||||
{ \
|
||||
ic0 = (num_iter - 1) * panel_dim; \
|
||||
ic_inc = -panel_dim; \
|
||||
ic0 = (num_iter - 1) * panel_dim_max; \
|
||||
ic_inc = -panel_dim_max; \
|
||||
ip0 = num_iter - 1; \
|
||||
ip_inc = -1; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
ic0 = 0; \
|
||||
ic_inc = panel_dim; \
|
||||
ic_inc = panel_dim_max; \
|
||||
ip0 = 0; \
|
||||
ip_inc = 1; \
|
||||
} \
|
||||
@@ -258,7 +261,7 @@ void PASTEMAC(ch,varname )( \
|
||||
for ( ic = ic0, ip = ip0, it = 0; it < num_iter; \
|
||||
ic += ic_inc, ip += ip_inc, it += 1 ) \
|
||||
{ \
|
||||
panel_dim_i = bli_min( panel_dim, iter_dim - ic ); \
|
||||
panel_dim_i = bli_min( panel_dim_max, iter_dim - ic ); \
|
||||
\
|
||||
diagoffc_i = diagoffc + (ip )*diagoffc_inc; \
|
||||
c_begin = c_cast + (ic )*vs_c; \
|
||||
@@ -269,11 +272,11 @@ void PASTEMAC(ch,varname )( \
|
||||
triangular), pack only as much as we need (ie: skip over as much
|
||||
as possible on the unstored side of the diagonal).
|
||||
Otherwise, we assume the current panel is full-length. */ \
|
||||
if ( bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel, *n_panel ) ) \
|
||||
if ( bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel_full, *n_panel_full ) ) \
|
||||
{ \
|
||||
continue; \
|
||||
} \
|
||||
else if ( bli_intersects_diag_n( diagoffc_i, *m_panel, *n_panel ) && \
|
||||
else if ( bli_intersects_diag_n( diagoffc_i, *m_panel_full, *n_panel_full ) && \
|
||||
bli_is_triangular( strucc ) ) \
|
||||
{ \
|
||||
/* Sanity check. Diagonals should not intersect the short end of
|
||||
@@ -284,199 +287,73 @@ void PASTEMAC(ch,varname )( \
|
||||
( bli_is_row_stored_f( rs_p, cs_p ) && diagoffc_i > 0 ) ) \
|
||||
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \
|
||||
\
|
||||
if ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) \
|
||||
if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) || \
|
||||
( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) ) \
|
||||
{ \
|
||||
panel_off_i = 0; \
|
||||
panel_len_i = bli_min( panel_len, -diagoffc_i + panel_dim_i ); \
|
||||
panel_len_max_i = bli_min( panel_len_max, -diagoffc_i + panel_dim ); \
|
||||
panel_len_i = bli_abs( diagoffc_i ) + panel_dim_i; \
|
||||
panel_len_max_i = bli_abs( diagoffc_i ) + panel_dim_max; \
|
||||
diagoffp = diagoffc_i; \
|
||||
} \
|
||||
else if ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) \
|
||||
else /* if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) || \
|
||||
( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) ) */ \
|
||||
{ \
|
||||
panel_off_i = bli_abs( bli_min( diagoffc_i, 0 ) ); \
|
||||
panel_off_i = bli_abs( diagoffc_i ); \
|
||||
panel_len_i = panel_len - panel_off_i; \
|
||||
panel_len_max_i = panel_len_max - panel_off_i; \
|
||||
diagoffp = diagoffc_i + panel_off_i; \
|
||||
} \
|
||||
else if ( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) \
|
||||
{ \
|
||||
panel_off_i = bli_max( diagoffc_i, 0 ); \
|
||||
panel_len_i = panel_len - panel_off_i; \
|
||||
panel_len_max_i = panel_len_max - panel_off_i; \
|
||||
diagoffp = diagoffc_i - panel_off_i; \
|
||||
} \
|
||||
else /* if ( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) */ \
|
||||
{ \
|
||||
panel_off_i = 0; \
|
||||
panel_len_i = bli_min( panel_len, diagoffc_i + panel_dim_i ); \
|
||||
panel_len_max_i = bli_min( panel_len_max, diagoffc_i + panel_dim ); \
|
||||
diagoffp = diagoffc_i; \
|
||||
diagoffp = 0; \
|
||||
} \
|
||||
\
|
||||
/* Adjust the pointer to the beginning of the panel in C based on
|
||||
the offset determined above. */ \
|
||||
c_use = c_begin + (panel_off_i )*ldc; \
|
||||
p_use = p_begin; \
|
||||
\
|
||||
/* Pack the panel. */ \
|
||||
PASTEMAC(ch,packm_cxk)( conjc, \
|
||||
panel_dim_i, \
|
||||
panel_len_i, \
|
||||
kappa_cast, \
|
||||
c_use, incc, ldc, \
|
||||
p_use, ldp ); \
|
||||
PASTEMAC(ch,packm_tri_cxk)( strucc, \
|
||||
diagoffp, \
|
||||
diagc, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
invdiag, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
*m_panel_max, \
|
||||
*n_panel_max, \
|
||||
kappa_cast, \
|
||||
c_use, rs_c, cs_c, \
|
||||
p_use, rs_p, cs_p ); \
|
||||
\
|
||||
/* If the diagonal of C is implicitly unit, set the diagonal of
|
||||
the packed panel to unit. */ \
|
||||
if ( bli_is_unit_diag( diagc ) ) \
|
||||
{ \
|
||||
PASTEMAC2(ch,ch,setd_unb_var1)( diagoffp, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
kappa_cast, \
|
||||
p_use, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/* If requested, invert the diagonal of the packed panel. */ \
|
||||
if ( invdiag == TRUE ) \
|
||||
{ \
|
||||
PASTEMAC(ch,invertd_unb_var1)( diagoffp, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
p_use, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/* Always densify the unstored part of the packed panel. */ \
|
||||
{ \
|
||||
uplo_t uplop = uploc; \
|
||||
\
|
||||
/* For triangular matrices, we wish to reference the region
|
||||
strictly opposite the diagonal of C. This amounts to
|
||||
toggling uploc and then shifting the diagonal offset to
|
||||
shrink the stored region (by one diagonal). */ \
|
||||
bli_toggle_uplo( uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
|
||||
\
|
||||
/* Set the region opposite the diagonal of P to zero. */ \
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( diagoffp, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
uplop, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
zero, \
|
||||
p_use, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
p_inc = ldp * panel_len_max_i; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
c_use = c_begin; \
|
||||
p_use = p_begin; \
|
||||
\
|
||||
/* Pack a full-length panel. */ \
|
||||
panel_off_i = 0; \
|
||||
panel_len_i = panel_len; \
|
||||
panel_len_max_i = panel_len_max; \
|
||||
\
|
||||
/* Pack the panel. */ \
|
||||
PASTEMAC(ch,packm_cxk)( conjc, \
|
||||
panel_dim_i, \
|
||||
panel_len_i, \
|
||||
kappa_cast, \
|
||||
c_use, incc, ldc, \
|
||||
p_use, ldp ); \
|
||||
PASTEMAC(ch,packm_gen_cxk)( BLIS_GENERAL, \
|
||||
0, \
|
||||
BLIS_DENSE, \
|
||||
conjc, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
*m_panel_max, \
|
||||
*n_panel_max, \
|
||||
kappa, \
|
||||
c_begin, rs_c, cs_c, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
\
|
||||
p_inc = ldp * panel_len_max_i; \
|
||||
} \
|
||||
\
|
||||
/* If necessary, zero-pad at the edge of the panel dimension (ie: along
|
||||
the long dimension of the panel). */ \
|
||||
if ( panel_dim_i != panel_dim ) \
|
||||
{ \
|
||||
/* Note that this code does the right thing for both row and
|
||||
column panels, since an m x n column-stored row panel and an
|
||||
n x m row-stored column panel look the same in memory. */ \
|
||||
dim_t i = panel_dim_i; \
|
||||
dim_t m_edge = panel_dim - i; \
|
||||
dim_t n_edge = panel_len_max_i; \
|
||||
inc_t rs_pe = 1; \
|
||||
inc_t cs_pe = ldp; \
|
||||
ctype* p_edge = p_begin + (i )*rs_pe; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_pe, cs_pe ); \
|
||||
} \
|
||||
\
|
||||
/* If necessary, zero-pad at the far end of the panel (ie: at the
|
||||
other side of the long dimension of the panel). */ \
|
||||
if ( panel_len_i != panel_len_max_i ) \
|
||||
{ \
|
||||
/* Note that this code does the right thing for both row and
|
||||
column panels, since an m x n column-stored row panel and an
|
||||
n x m row-stored column panel look the same in memory. */ \
|
||||
/* Note that we set m_edge as panel_dim, and not panel_dim_i;
|
||||
this is so that we can simultaneously zero out the corner
|
||||
region (if it exists). */ \
|
||||
dim_t j = panel_len_i; \
|
||||
dim_t m_edge = panel_dim; \
|
||||
dim_t n_edge = panel_len_max_i - j; \
|
||||
inc_t rs_pe = 1; \
|
||||
inc_t cs_pe = ldp; \
|
||||
ctype* p_edge = p_begin + (j )*cs_pe; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_pe, cs_pe ); \
|
||||
} \
|
||||
\
|
||||
/* If this panel is an edge case in both panel dimension and length,
|
||||
then it must be a bottom-right corner case. Set the part of the
|
||||
diagonal that extends into the zero-padded region to identity. */ \
|
||||
if ( panel_dim_i != panel_dim && \
|
||||
panel_len_i != panel_len_max_i ) \
|
||||
{ \
|
||||
/* Note that this code does the right thing for both row and
|
||||
column panels, since an m x n column-stored row panel and an
|
||||
n x m row-stored column panel look the same in memory. */ \
|
||||
dim_t i = panel_dim_i; \
|
||||
dim_t j = panel_len_i; \
|
||||
dim_t m_br = panel_dim - i; \
|
||||
dim_t n_br = panel_len_max_i - j; \
|
||||
inc_t rs_pe = 1; \
|
||||
inc_t cs_pe = ldp; \
|
||||
ctype* one = PASTEMAC(ch,1); \
|
||||
ctype* p_edge = p_begin + (i )*rs_pe + (j )*cs_pe; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setd_unb_var1)( 0, \
|
||||
m_br, \
|
||||
n_br, \
|
||||
one, \
|
||||
p_edge, rs_pe, cs_pe ); \
|
||||
\
|
||||
/*
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var3: setting br unit diag", m_br, n_br, \
|
||||
p_edge, rs_pe, cs_pe, "%5.2f", "" ); \
|
||||
*/ \
|
||||
} \
|
||||
\
|
||||
/*
|
||||
if ( rs_p == 1 ) \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var3: ap copied", panel_dim, panel_len_max_i, \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var3: ap copied", panel_dim_max, panel_len_max_i, \
|
||||
p_begin, rs_p, cs_p, "%4.1f", "" ); \
|
||||
if ( cs_p == 1 ) \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var3: bp copied", panel_len_max_i, panel_dim, \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var3: bp copied", panel_len_max_i, panel_dim_max, \
|
||||
p_begin, rs_p, cs_p, "%4.1f", "" ); \
|
||||
*/ \
|
||||
\
|
||||
\
|
||||
p_begin += p_inc; \
|
||||
} \
|
||||
|
||||
@@ -52,7 +52,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t n, \
|
||||
void* beta, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* p, inc_t ldp \
|
||||
void* p, inc_t ldp \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_cxk )
|
||||
|
||||
154
frame/1m/packm/bli_packm_gen_cxk.c
Normal file
154
frame/1m/packm/bli_packm_gen_cxk.c
Normal file
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
struc_t strucc, \
|
||||
doff_t diagoffc, \
|
||||
uplo_t uploc, \
|
||||
conj_t conjc, \
|
||||
dim_t m_panel, \
|
||||
dim_t n_panel, \
|
||||
dim_t m_panel_max, \
|
||||
dim_t n_panel_max, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict c_begin = c; \
|
||||
ctype* restrict p_begin = p; \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
\
|
||||
dim_t panel_dim; \
|
||||
dim_t panel_len; \
|
||||
inc_t incc, ldc; \
|
||||
inc_t ldp; \
|
||||
\
|
||||
\
|
||||
/* If the strides of p indicate row storage, then we are packing to
|
||||
column panels; otherwise, if the strides indicate column storage,
|
||||
we are packing to row panels. */ \
|
||||
if ( bli_is_row_stored_f( rs_p, cs_p ) ) \
|
||||
{ \
|
||||
/* Prepare to pack to row-stored column panel. */ \
|
||||
panel_dim = n_panel; \
|
||||
panel_len = m_panel; \
|
||||
incc = cs_c; \
|
||||
ldc = rs_c; \
|
||||
ldp = rs_p; \
|
||||
} \
|
||||
else /* if ( bli_is_col_stored_f( rs_p, cs_p ) ) */ \
|
||||
{ \
|
||||
/* Prepare to pack to column-stored row panel. */ \
|
||||
panel_dim = m_panel; \
|
||||
panel_len = n_panel; \
|
||||
incc = rs_c; \
|
||||
ldc = cs_c; \
|
||||
ldp = cs_p; \
|
||||
} \
|
||||
\
|
||||
/* If the current panel is unstored, we need to make a few
|
||||
adjustments so we refer to the data where it is actually
|
||||
stored, also taking conjugation into account. (Note this
|
||||
implicitly assumes we are operating on a dense panel
|
||||
within a larger symmetric or Hermitian matrix, since a
|
||||
general matrix would not contain any unstored region.) */ \
|
||||
if ( bli_is_unstored_subpart_n( diagoffc, uploc, m_panel, n_panel ) ) \
|
||||
{ \
|
||||
c_begin = c_begin + diagoffc * ( doff_t )cs_c + \
|
||||
-diagoffc * ( doff_t )rs_c; \
|
||||
bli_swap_incs( incc, ldc ); \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc ); \
|
||||
} \
|
||||
\
|
||||
/* Pack the panel. */ \
|
||||
PASTEMAC(ch,packm_cxk)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa_cast, \
|
||||
c_begin, incc, ldc, \
|
||||
p_begin, ldp ); \
|
||||
\
|
||||
\
|
||||
/* The packed memory region was acquired/allocated with "aligned"
|
||||
dimensions (ie: dimensions that were possibly inflated up to a
|
||||
multiple). When these dimension are inflated, it creates empty
|
||||
regions along the bottom and/or right edges of the matrix. If
|
||||
either region exists, we set them to zero. This allows the
|
||||
micro-kernel to remain simple since it does not need to support
|
||||
different register blockings for the edge cases. */ \
|
||||
if ( m_panel != m_panel_max ) \
|
||||
{ \
|
||||
dim_t i = m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype* p_edge = p_begin + (i )*rs_p; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
if ( n_panel != n_panel_max ) \
|
||||
{ \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype* p_edge = p_begin + (j )*cs_p; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( packm_gen_cxk )
|
||||
|
||||
53
frame/1m/packm/bli_packm_gen_cxk.h
Normal file
53
frame/1m/packm/bli_packm_gen_cxk.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
struc_t strucc, \
|
||||
doff_t diagoffc, \
|
||||
uplo_t uploc, \
|
||||
conj_t conjc, \
|
||||
dim_t m_panel, \
|
||||
dim_t n_panel, \
|
||||
dim_t m_panel_max, \
|
||||
dim_t n_panel_max, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_gen_cxk )
|
||||
|
||||
274
frame/1m/packm/bli_packm_herm_cxk.c
Normal file
274
frame/1m/packm/bli_packm_herm_cxk.c
Normal file
@@ -0,0 +1,274 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
struc_t strucc, \
|
||||
doff_t diagoffc, \
|
||||
uplo_t uploc, \
|
||||
conj_t conjc, \
|
||||
dim_t m_panel, \
|
||||
dim_t n_panel, \
|
||||
dim_t m_panel_max, \
|
||||
dim_t n_panel_max, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict c_begin = c; \
|
||||
ctype* restrict p_begin = p; \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
\
|
||||
dim_t i, j; \
|
||||
dim_t panel_len; \
|
||||
doff_t diagoffc_abs; \
|
||||
dim_t panel_dim; \
|
||||
inc_t incc, ldc; \
|
||||
inc_t ldp; \
|
||||
\
|
||||
ctype* restrict c10; \
|
||||
ctype* restrict p10; \
|
||||
dim_t p10_dim, p10_len; \
|
||||
inc_t incc10, ldc10; \
|
||||
doff_t diagoffc10; \
|
||||
conj_t conjc10; \
|
||||
\
|
||||
ctype* restrict c12; \
|
||||
ctype* restrict p12; \
|
||||
dim_t p12_dim, p12_len; \
|
||||
inc_t incc12, ldc12; \
|
||||
doff_t diagoffc12; \
|
||||
conj_t conjc12; \
|
||||
\
|
||||
ctype* restrict c11; \
|
||||
ctype* restrict p11; \
|
||||
dim_t p11_m; \
|
||||
dim_t p11_n; \
|
||||
inc_t rs_p11, cs_p11; \
|
||||
\
|
||||
\
|
||||
/* If the strides of p indicate row storage, then we are packing to
|
||||
column panels; otherwise, if the strides indicate column storage,
|
||||
we are packing to row panels. */ \
|
||||
if ( bli_is_row_stored_f( rs_p, cs_p ) ) \
|
||||
{ \
|
||||
/* Prepare to pack to row-stored column panel. */ \
|
||||
panel_len = m_panel; \
|
||||
panel_dim = n_panel; \
|
||||
incc = cs_c; \
|
||||
ldc = rs_c; \
|
||||
ldp = rs_p; \
|
||||
rs_p11 = rs_p; \
|
||||
cs_p11 = 1; \
|
||||
} \
|
||||
else /* if ( bli_is_col_stored_f( rs_p, cs_p ) ) */ \
|
||||
{ \
|
||||
/* Prepare to pack to column-stored row panel. */ \
|
||||
panel_len = n_panel; \
|
||||
panel_dim = m_panel; \
|
||||
incc = rs_c; \
|
||||
ldc = cs_c; \
|
||||
ldp = cs_p; \
|
||||
rs_p11 = 1; \
|
||||
cs_p11 = cs_p; \
|
||||
} \
|
||||
\
|
||||
diagoffc_abs = bli_abs( diagoffc ); \
|
||||
\
|
||||
/* Sanity check. Diagonals should not intersect the short end of
|
||||
a micro-panel. If they do, then somehow the constraints on
|
||||
cache blocksizes being a whole multiple of the register
|
||||
blocksizes was somehow violated. */ \
|
||||
if ( ( bli_is_col_stored_f( rs_p, cs_p ) && diagoffc < 0 ) || \
|
||||
( bli_is_row_stored_f( rs_p, cs_p ) && diagoffc > 0 ) ) \
|
||||
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \
|
||||
\
|
||||
if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) || \
|
||||
( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) ) \
|
||||
{ \
|
||||
p10_dim = panel_dim; \
|
||||
p10_len = diagoffc_abs; \
|
||||
p10 = p_begin; \
|
||||
c10 = c_begin; \
|
||||
incc10 = incc; \
|
||||
ldc10 = ldc; \
|
||||
conjc10 = conjc; \
|
||||
\
|
||||
p12_dim = panel_dim; \
|
||||
p12_len = panel_len - p10_len; \
|
||||
j = p10_len; \
|
||||
diagoffc12 = diagoffc_abs - j; \
|
||||
p12 = p_begin + (j )*ldp; \
|
||||
c12 = c_begin + (j )*ldc; \
|
||||
c12 = c12 + diagoffc12 * ( doff_t )cs_c + \
|
||||
-diagoffc12 * ( doff_t )rs_c; \
|
||||
incc12 = ldc; \
|
||||
ldc12 = incc; \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
p11_m = panel_dim; \
|
||||
p11_n = panel_dim; \
|
||||
j = diagoffc_abs; \
|
||||
p11 = p_begin + (j )*ldp; \
|
||||
c11 = c_begin + (j )*ldc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc12 ); \
|
||||
} \
|
||||
else /* if ( ( bli_is_row_stored_f( rs_p, cs_p ) && bli_is_lower( uploc ) ) || \
|
||||
( bli_is_col_stored_f( rs_p, cs_p ) && bli_is_upper( uploc ) ) ) */ \
|
||||
{ \
|
||||
p10_dim = panel_dim; \
|
||||
p10_len = diagoffc_abs + panel_dim; \
|
||||
diagoffc10 = diagoffc; \
|
||||
p10 = p_begin; \
|
||||
c10 = c_begin; \
|
||||
c10 = c10 + diagoffc10 * ( doff_t )cs_c + \
|
||||
-diagoffc10 * ( doff_t )rs_c; \
|
||||
incc10 = ldc; \
|
||||
ldc10 = incc; \
|
||||
conjc10 = conjc; \
|
||||
\
|
||||
p12_dim = panel_dim; \
|
||||
p12_len = panel_len - p10_len; \
|
||||
j = p10_len; \
|
||||
p12 = p_begin + (j )*ldp; \
|
||||
c12 = c_begin + (j )*ldc; \
|
||||
incc12 = incc; \
|
||||
ldc12 = ldc; \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
p11_m = panel_dim; \
|
||||
p11_n = panel_dim; \
|
||||
j = diagoffc_abs; \
|
||||
p11 = p_begin + (j )*ldp; \
|
||||
c11 = c_begin + (j )*ldc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc10 ); \
|
||||
} \
|
||||
\
|
||||
/* Pack to P10. For upper storage, this includes the unstored
|
||||
triangle of C11. */ \
|
||||
PASTEMAC(ch,packm_cxk)( conjc10, \
|
||||
p10_dim, \
|
||||
p10_len, \
|
||||
kappa_cast, \
|
||||
c10, incc10, ldc10, \
|
||||
p10, ldp ); \
|
||||
\
|
||||
/* Pack to P12. For lower storage, this includes the unstored
|
||||
triangle of C11. */ \
|
||||
PASTEMAC(ch,packm_cxk)( conjc12, \
|
||||
p12_dim, \
|
||||
p12_len, \
|
||||
kappa_cast, \
|
||||
c12, incc12, ldc12, \
|
||||
p12, ldp ); \
|
||||
\
|
||||
/* Pack the stored triangule of C11 to P11. */ \
|
||||
PASTEMAC3(ch,ch,ch,scal2m_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
uploc, \
|
||||
conjc, \
|
||||
p11_m, \
|
||||
p11_n, \
|
||||
kappa_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
p11, rs_p11, cs_p11 ); \
|
||||
\
|
||||
/* If source matrix C is Hermitian, we have to zero out the
|
||||
imaginary components of the diagonal of P11 in case the
|
||||
corresponding elements in C11 were not already zero. */ \
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
{ \
|
||||
/* NOTE: We can directly increment p11 since we are done
|
||||
using p11 for the remainder of the function. */ \
|
||||
for ( i = 0; i < p11_m; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,seti0s)( *p11 ); \
|
||||
\
|
||||
p11 += rs_p11 + cs_p11; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
/* The packed memory region was acquired/allocated with "aligned"
|
||||
dimensions (ie: dimensions that were possibly inflated up to a
|
||||
multiple). When these dimension are inflated, it creates empty
|
||||
regions along the bottom and/or right edges of the matrix. If
|
||||
either region exists, we set them to zero. This allows the
|
||||
micro-kernel to remain simple since it does not need to support
|
||||
different register blockings for the edge cases. */ \
|
||||
if ( m_panel != m_panel_max ) \
|
||||
{ \
|
||||
dim_t i = m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype* p_edge = p_begin + (i )*rs_p; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
if ( n_panel != n_panel_max ) \
|
||||
{ \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype* p_edge = p_begin + (j )*cs_p; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( packm_herm_cxk )
|
||||
|
||||
53
frame/1m/packm/bli_packm_herm_cxk.h
Normal file
53
frame/1m/packm/bli_packm_herm_cxk.h
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
struc_t strucc, \
|
||||
doff_t diagoffc, \
|
||||
uplo_t uploc, \
|
||||
conj_t conjc, \
|
||||
dim_t m_panel, \
|
||||
dim_t n_panel, \
|
||||
dim_t m_panel_max, \
|
||||
dim_t n_panel_max, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_herm_cxk )
|
||||
|
||||
210
frame/1m/packm/bli_packm_tri_cxk.c
Normal file
210
frame/1m/packm/bli_packm_tri_cxk.c
Normal file
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
struc_t strucc, \
|
||||
doff_t diagoffp, \
|
||||
diag_t diagc, \
|
||||
uplo_t uploc, \
|
||||
conj_t conjc, \
|
||||
bool_t invdiag, \
|
||||
dim_t m_panel, \
|
||||
dim_t n_panel, \
|
||||
dim_t m_panel_max, \
|
||||
dim_t n_panel_max, \
|
||||
ctype* kappa, \
|
||||
ctype* c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* p, inc_t rs_p, inc_t cs_p \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict c_begin = c; \
|
||||
ctype* restrict p_begin = p; \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
\
|
||||
dim_t panel_dim; \
|
||||
dim_t panel_len; \
|
||||
inc_t incc, ldc; \
|
||||
inc_t ldp; \
|
||||
\
|
||||
\
|
||||
/* If the strides of p indicate row storage, then we are packing to
|
||||
column panels; otherwise, if the strides indicate column storage,
|
||||
we are packing to row panels. */ \
|
||||
if ( bli_is_row_stored_f( rs_p, cs_p ) ) \
|
||||
{ \
|
||||
/* Prepare to pack to row-stored column panel. */ \
|
||||
panel_dim = n_panel; \
|
||||
panel_len = m_panel; \
|
||||
incc = cs_c; \
|
||||
ldc = rs_c; \
|
||||
ldp = rs_p; \
|
||||
} \
|
||||
else /* if ( bli_is_col_stored_f( rs_p, cs_p ) ) */ \
|
||||
{ \
|
||||
/* Prepare to pack to column-stored row panel. */ \
|
||||
panel_dim = m_panel; \
|
||||
panel_len = n_panel; \
|
||||
incc = rs_c; \
|
||||
ldc = cs_c; \
|
||||
ldp = cs_p; \
|
||||
} \
|
||||
\
|
||||
/* Pack the panel. */ \
|
||||
PASTEMAC(ch,packm_cxk)( conjc, \
|
||||
panel_dim, \
|
||||
panel_len, \
|
||||
kappa_cast, \
|
||||
c_begin, incc, ldc, \
|
||||
p_begin, ldp ); \
|
||||
\
|
||||
/* If the diagonal of C is implicitly unit, set the diagonal of
|
||||
the packed panel to unit. */ \
|
||||
if ( bli_is_unit_diag( diagc ) ) \
|
||||
{ \
|
||||
PASTEMAC2(ch,ch,setd_unb_var1)( diagoffp, \
|
||||
m_panel, \
|
||||
n_panel, \
|
||||
kappa_cast, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/* If requested, invert the diagonal of the packed panel. */ \
|
||||
if ( invdiag == TRUE ) \
|
||||
{ \
|
||||
PASTEMAC(ch,invertd_unb_var1)( diagoffp, \
|
||||
m_panel, \
|
||||
n_panel, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/* Set the region opposite the diagonal of P to zero. To do this,
|
||||
we need to reference the "unstored" region on the other side of
|
||||
the diagonal. This amounts to toggling uploc and then shifting
|
||||
the diagonal offset to shrink the newly referenced region (by
|
||||
one diagonal). */ \
|
||||
{ \
|
||||
uplo_t uplop = uploc; \
|
||||
\
|
||||
bli_toggle_uplo( uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( diagoffp, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
uplop, \
|
||||
m_panel, \
|
||||
n_panel, \
|
||||
zero, \
|
||||
p_begin, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/* The packed memory region was acquired/allocated with "aligned"
|
||||
dimensions (ie: dimensions that were possibly inflated up to a
|
||||
multiple). When these dimension are inflated, it creates empty
|
||||
regions along the bottom and/or right edges of the matrix. If
|
||||
either region exists, we set them to zero. This allows the
|
||||
micro-kernel to remain simple since it does not need to support
|
||||
different register blockings for the edge cases. */ \
|
||||
if ( m_panel != m_panel_max ) \
|
||||
{ \
|
||||
dim_t i = m_panel; \
|
||||
dim_t m_edge = m_panel_max - i; \
|
||||
dim_t n_edge = n_panel_max; \
|
||||
ctype* p_edge = p_begin + (i )*rs_p; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
if ( n_panel != n_panel_max ) \
|
||||
{ \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_edge = m_panel_max; \
|
||||
dim_t n_edge = n_panel_max - j; \
|
||||
ctype* p_edge = p_begin + (j )*cs_p; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setm_unb_var1)( 0, \
|
||||
BLIS_NONUNIT_DIAG, \
|
||||
BLIS_DENSE, \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
zero, \
|
||||
p_edge, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
/* If this panel is an edge case in both panel dimension and length,
|
||||
then it must be a bottom-right corner case. Set the part of the
|
||||
diagonal that extends into the zero-padded region to identity. */ \
|
||||
if ( m_panel != m_panel_max && \
|
||||
n_panel != n_panel_max ) \
|
||||
{ \
|
||||
dim_t i = m_panel; \
|
||||
dim_t j = n_panel; \
|
||||
dim_t m_br = m_panel_max - i; \
|
||||
dim_t n_br = n_panel_max - j; \
|
||||
ctype* one = PASTEMAC(ch,1); \
|
||||
ctype* p_edge = p_begin + (i )*rs_p + (j )*cs_p; \
|
||||
\
|
||||
PASTEMAC2(ch,ch,setd_unb_var1)( 0, \
|
||||
m_br, \
|
||||
n_br, \
|
||||
one, \
|
||||
p_edge, rs_p, cs_p ); \
|
||||
/*
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var3: setting br unit diag", m_br, n_br, \
|
||||
p_edge, rs_p, cs_p, "%4.1f", "" ); \
|
||||
*/ \
|
||||
} \
|
||||
/*
|
||||
if ( rs_p == 1 ) \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var3: ap copied", m_panel_max, n_panel_max, \
|
||||
p_begin, rs_p, cs_p, "%4.1f", "" ); \
|
||||
if ( cs_p == 1 ) \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var3: bp copied", m_panel_max, n_panel_max, \
|
||||
p_begin, rs_p, cs_p, "%4.1f", "" ); \
|
||||
*/ \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( packm_tri_cxk )
|
||||
|
||||
55
frame/1m/packm/bli_packm_tri_cxk.h
Normal file
55
frame/1m/packm/bli_packm_tri_cxk.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
struc_t strucc, \
|
||||
doff_t diagoffc, \
|
||||
diag_t diagc, \
|
||||
uplo_t uploc, \
|
||||
conj_t conjc, \
|
||||
bool_t invdiag, \
|
||||
dim_t m_panel, \
|
||||
dim_t n_panel, \
|
||||
dim_t m_panel_max, \
|
||||
dim_t n_panel_max, \
|
||||
ctype* kappa, \
|
||||
ctype* c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* p, inc_t rs_p, inc_t cs_p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_tri_cxk )
|
||||
|
||||
Reference in New Issue
Block a user