Files
blis/frame/include/bli_param_macro_defs.h
Field G. Van Zee 5e54f46ccb Added template implementations and other tweaks.
Details:
- Added a 'template' configuration, which contains stub implementations of the
  level 1, 1f, and 3 kernels with one datatype implemented in C for each, with
  lots of in-file comments and documentation.
- Modified some variable/parameter names for some 1/1f operations. (e.g.
  renaming vector length parameter from m to n.)
- Moved level-1f fusing factors from axpyf, dotxf, and dotxaxpyf header files
  to bli_kernel.h.
- Modifed test suite to print out fusing factors for axpyf, dotxf, and
  dotxaxpyf, as well as the default fusing factor (which are all equal
  in the reference and template implementations).
- Cleaned up some sloppiness in the level-1f unb_var1.c files whereby these
  reference variants were implemented in terms of front-end routines rather
  that directly in terms of the kernels. (For example, axpy2v was implemented
  as two calls to axpyv rather than two calls to AXPYV_KERNEL.)
- Changed the interface to dotxf so that it matches that of axpyf, in that
  A is assumed to be m x b_n in both cases, and for dotxf A is actually used
  as A^T.
- Minor variable naming and comment changes to reference micro-kernels in
  frame/3/gemm/ukernels and frame/3/trsm/ukernels.
2013-09-30 12:58:18 -05:00

792 lines
19 KiB
C

/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2013, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_PARAM_MACRO_DEFS_H
#define BLIS_PARAM_MACRO_DEFS_H
// -- Parameter query macros --
// buffer
#define bli_is_aligned_to( p, size ) \
\
( ( siz_t )(p) % (size) == 0 )
#define bli_is_unaligned_to( p, size ) \
\
( ( siz_t )(p) % (size) != 0 )
#define bli_offset_from_alignment( p, size ) \
\
( ( siz_t )(p) % (size) )
// datatype
#define bli_is_float( dt ) \
\
( dt == BLIS_FLOAT )
#define bli_is_double( dt ) \
\
( dt == BLIS_DOUBLE )
#define bli_is_scomplex( dt ) \
\
( dt == BLIS_SCOMPLEX )
#define bli_is_dcomplex( dt ) \
\
( dt == BLIS_DCOMPLEX )
#define bli_is_constant( dt ) \
\
( dt == BLIS_CONSTANT )
#define bli_is_int( dt ) \
\
( dt == BLIS_INT )
#define bli_is_real( dt ) \
\
( bli_is_float( dt ) || \
bli_is_double( dt ) )
#define bli_is_complex( dt ) \
\
( bli_is_scomplex( dt ) || \
bli_is_dcomplex( dt ) )
#define bli_is_single_prec( dt ) \
\
( bli_is_float( dt ) || \
bli_is_scomplex( dt ) )
#define bli_is_double_prec( dt ) \
\
( bli_is_double( dt ) || \
bli_is_dcomplex( dt ) )
#define bli_datatype_proj_to_real( dt ) \
\
( dt & ~BLIS_BITVAL_COMPLEX )
#define bli_datatype_proj_to_complex( dt ) \
\
( dt & BLIS_BITVAL_COMPLEX )
#define bli_domain_of_dt( dt ) \
\
( dt & BLIS_DOMAIN_BIT )
// side
#define bli_is_left( side ) \
\
( side == BLIS_LEFT )
#define bli_is_right( side ) \
\
( side == BLIS_RIGHT )
#define bli_side_toggled( side ) \
\
( bli_is_left( side ) ? BLIS_RIGHT : BLIS_LEFT )
#define bli_toggle_side( side ) \
{ \
side = bli_side_toggled( side ); \
}
// uplo
#define bli_is_lower( uplo ) \
\
( uplo == BLIS_LOWER )
#define bli_is_upper( uplo ) \
\
( uplo == BLIS_UPPER )
#define bli_is_upper_or_lower( uplo ) \
\
( bli_is_upper( uplo ) || bli_is_lower( uplo ) )
#define bli_is_dense( uplo ) \
\
( uplo == BLIS_DENSE )
#define bli_is_zeros( uplo ) \
\
( uplo == BLIS_ZEROS )
#define bli_uplo_toggled( uplo ) \
\
( bli_is_upper_or_lower( uplo ) ? \
( ( uplo ^ BLIS_LOWER_BIT ) ^ BLIS_UPPER_BIT ) : uplo \
)
#define bli_toggle_uplo( uplo ) \
{ \
uplo = bli_uplo_toggled( uplo ); \
}
#define bli_set_uplo_with_trans( trans, uplo, uplo_trans ) \
{ \
if ( bli_does_notrans( trans ) ) { uplo_trans = uplo; } \
else { uplo_trans = bli_uplo_toggled( uplo ); } \
}
// structure
#define bli_is_general( struc ) \
\
( struc == BLIS_GENERAL )
#define bli_is_hermitian( struc ) \
\
( struc == BLIS_HERMITIAN )
#define bli_is_symmetric( struc ) \
\
( struc == BLIS_SYMMETRIC )
#define bli_is_triangular( struc ) \
\
( struc == BLIS_TRIANGULAR )
// conj
#define bli_is_noconj( conj ) \
\
( conj == BLIS_NO_CONJUGATE )
#define bli_is_conj( conj ) \
\
( conj == BLIS_CONJUGATE )
#define bli_conj_toggled( conj ) \
\
( conj ^ BLIS_CONJ_BIT )
#define bli_apply_conj( conjapp, conj )\
\
( conj ^ (conjapp) )
#define bli_toggle_conj( conj ) \
{ \
conj = bli_conj_toggled( conj ); \
}
// trans
#define bli_is_notrans( trans ) \
\
( trans == BLIS_NO_TRANSPOSE )
#define bli_is_trans( trans ) \
\
( trans == BLIS_TRANSPOSE )
#define bli_is_conjnotrans( trans ) \
\
( trans == BLIS_CONJ_NO_TRANSPOSE )
#define bli_is_conjtrans( trans ) \
\
( trans == BLIS_CONJ_TRANSPOSE )
#define bli_does_notrans( trans ) \
\
( (~(trans) & BLIS_TRANS_BIT ) == BLIS_BITVAL_TRANS )
#define bli_does_trans( trans ) \
\
( ( trans & BLIS_TRANS_BIT ) == BLIS_BITVAL_TRANS )
#define bli_does_noconj( trans ) \
\
( (~(trans) & BLIS_CONJ_BIT ) == BLIS_BITVAL_CONJ )
#define bli_does_conj( trans ) \
\
( ( trans & BLIS_CONJ_BIT ) == BLIS_BITVAL_CONJ )
#define bli_extract_trans( trans ) \
\
( trans & BLIS_TRANS_BIT )
#define bli_extract_conj( trans ) \
\
( trans & BLIS_CONJ_BIT )
#define bli_trans_toggled( trans ) \
\
( trans ^ BLIS_TRANS_BIT )
#define bli_toggle_trans( trans ) \
{ \
trans = bli_trans_toggled( trans ); \
}
// diag
#define bli_is_nonunit_diag( diag ) \
\
( diag == BLIS_NONUNIT_DIAG )
#define bli_is_unit_diag( diag ) \
\
( diag == BLIS_UNIT_DIAG )
// dimension-related
#define bli_zero_dim1( n ) \
\
( (n) == 0 )
#define bli_zero_dim2( m, n ) \
\
( (m) == 0 || (n) == 0 )
#define bli_zero_dim3( m, n, k ) \
\
( (m) == 0 || (n) == 0 || (k) == 0 )
#define bli_nonzero_dim( n ) \
\
( (n) > 0 )
#define bli_vector_dim( m, n ) \
\
( (m) == 1 ? (n) : (m) )
#define bli_is_vector( m, n ) \
\
( (m) == 1 || (n) == 1 )
#define bli_is_row_vector( m, n ) \
\
( (m) == 1 )
#define bli_is_col_vector( m, n ) \
\
( (n) == 1 )
#define bli_set_dim_with_side( side, m, n, dim ) \
{ \
if ( bli_is_left( side ) ) { dim = m; } \
else { dim = n; } \
}
#define bli_set_dims_with_trans( trans, m, n, mtrans, ntrans ) \
{ \
if ( bli_does_notrans( trans ) ) { mtrans = m; ntrans = n; } \
else { mtrans = n; ntrans = m; } \
}
#define bli_set_dims_incs_with_trans( trans, m, n, rs, cs, mt, nt, rst, cst ) \
{ \
if ( bli_does_notrans( trans ) ) { mt = m; nt = n; rst = rs; cst = cs; } \
else { mt = n; nt = m; rst = cs; cst = rs; } \
}
// blocksize-related
#define bli_determine_blocksize_dim_f( i, dim, b_alg ) \
\
( bli_min( b_alg, dim - i ) )
#define bli_determine_blocksize_dim_b( i, dim, b_alg ) \
\
( i == 0 && dim % b_alg != 0 ? dim % b_alg \
: b_alg )
// stride-related
#define bli_vector_inc( trans, m, n, rs, cs ) \
\
( bli_does_notrans( trans ) ? ( m == 1 ? (cs) : (rs) ) \
: ( m == 1 ? (rs) : (cs) ) )
#define bli_is_row_stored( rs, cs ) \
\
( cs == 1 )
#define bli_is_col_stored( rs, cs ) \
\
( rs == 1 )
#define bli_is_gen_stored( rs, cs ) \
\
( rs != 1 && cs != 1 )
#define bli_is_row_tilted( rs, cs ) \
\
( cs < rs )
#define bli_is_col_tilted( rs, cs ) \
\
( rs < cs )
#define bli_has_nonunit_inc1( inc1 ) \
\
( inc1 != 1 )
#define bli_has_nonunit_inc2( inc1, inc2 ) \
\
( inc1 != 1 || inc2 != 1 )
#define bli_has_nonunit_inc3( inc1, inc2, inc3 ) \
\
( inc1 != 1 || inc2 != 1 || inc3 != 1 )
// diag offset-related
#define bli_negate_diag_offset( diagoff ) \
{ \
diagoff = -diagoff; \
}
#define bli_shift_diag_offset_to_grow_uplo( uplo, diagoff ) \
{ \
if ( bli_is_upper( uplo ) ) diagoff -= 1; \
else if ( bli_is_lower( uplo ) ) diagoff += 1; \
}
#define bli_shift_diag_offset_to_shrink_uplo( uplo, diagoff ) \
{ \
if ( bli_is_upper( uplo ) ) diagoff += 1; \
else if ( bli_is_lower( uplo ) ) diagoff -= 1; \
}
#define bli_diag_offset_with_trans( trans, diagoff ) \
\
( bli_does_trans( trans ) ? -diagoff : diagoff )
#define bli_is_strictly_above_diag( diagoff, trans, m, n ) \
\
( bli_does_trans( trans ) ? ( ( doff_t )n <= -diagoff ) \
: ( ( doff_t )m <= -diagoff ) )
#define bli_is_strictly_below_diag( diagoff, trans, m, n ) \
\
( bli_does_trans( trans ) ? ( ( doff_t )m <= diagoff ) \
: ( ( doff_t )n <= diagoff ) )
#define bli_is_outside_diag( diagoff, trans, m, n ) \
\
( bli_is_strictly_above_diag( diagoff, trans, m, n ) || \
bli_is_strictly_below_diag( diagoff, trans, m, n ) )
#define bli_is_stored_subpart( diagoff, trans, uplo, m, n ) \
\
( ( bli_is_upper( uplo ) && bli_is_strictly_above_diag( diagoff, trans, m, n ) ) || \
( bli_is_lower( uplo ) && bli_is_strictly_below_diag( diagoff, trans, m, n ) ) )
#define bli_is_unstored_subpart( diagoff, trans, uplo, m, n ) \
\
( ( bli_is_upper( uplo ) && bli_is_strictly_below_diag( diagoff, trans, m, n ) ) || \
( bli_is_lower( uplo ) && bli_is_strictly_above_diag( diagoff, trans, m, n ) ) )
#define bli_is_strictly_above_diag_n( diagoff, m, n ) \
\
( ( doff_t )m <= -diagoff ) \
#define bli_is_strictly_below_diag_n( diagoff, m, n ) \
\
( ( doff_t )n <= diagoff ) \
#define bli_intersects_diag_n( diagoff, m, n ) \
\
( !bli_is_strictly_above_diag_n( diagoff, m, n ) && \
!bli_is_strictly_below_diag_n( diagoff, m, n ) )
#define bli_is_stored_subpart_n( diagoff, uplo, m, n ) \
\
( ( bli_is_upper( uplo ) && bli_is_strictly_above_diag_n( diagoff, m, n ) ) || \
( bli_is_lower( uplo ) && bli_is_strictly_below_diag_n( diagoff, m, n ) ) )
#define bli_is_unstored_subpart_n( diagoff, uplo, m, n ) \
\
( ( bli_is_upper( uplo ) && bli_is_strictly_below_diag_n( diagoff, m, n ) ) || \
( bli_is_lower( uplo ) && bli_is_strictly_above_diag_n( diagoff, m, n ) ) )
// index-related
#define bli_is_edge_f( i1, iter, left ) \
\
( i1 == iter - 1 && left != 0 )
#define bli_is_not_edge_f( i1, iter, left ) \
\
( i1 != iter - 1 || left == 0 )
#define bli_is_edge_b( i1, iter, left ) \
\
( i1 == 0 && left != 0 )
#define bli_is_not_edge_b( i1, iter, left ) \
\
( i1 != 0 || left == 0 )
// packbuf_t-related
#define bli_packbuf_index( buf_type ) \
\
( ( (buf_type) & BLIS_PACK_BUFFER_BITS ) >> BLIS_PACK_BUFFER_SHIFT )
// return value for char
// return datatype for char
#define bli_stype ( BLIS_FLOAT )
#define bli_dtype ( BLIS_DOUBLE )
#define bli_ctype ( BLIS_SCOMPLEX )
#define bli_ztype ( BLIS_DCOMPLEX )
// return datatype "union" for char pair
#define bli_sstypeunion() ( BLIS_FLOAT )
#define bli_sdtypeunion() ( BLIS_DOUBLE )
#define bli_sctypeunion() ( BLIS_SCOMPLEX )
#define bli_sztypeunion() ( BLIS_DCOMPLEX )
#define bli_dstypeunion() ( BLIS_DOUBLE )
#define bli_ddtypeunion() ( BLIS_DOUBLE )
#define bli_dctypeunion() ( BLIS_DCOMPLEX )
#define bli_dztypeunion() ( BLIS_DCOMPLEX )
#define bli_cstypeunion() ( BLIS_SCOMPLEX )
#define bli_cdtypeunion() ( BLIS_DCOMPLEX )
#define bli_cctypeunion() ( BLIS_SCOMPLEX )
#define bli_cztypeunion() ( BLIS_DCOMPLEX )
#define bli_zstypeunion() ( BLIS_DCOMPLEX )
#define bli_zdtypeunion() ( BLIS_DCOMPLEX )
#define bli_zctypeunion() ( BLIS_DCOMPLEX )
#define bli_zztypeunion() ( BLIS_DCOMPLEX )
// return default format specifier for char
#define bli_sformatspec() "%9.2e"
#define bli_dformatspec() "%9.2e"
#define bli_cformatspec() "%9.2e + %9.2e "
#define bli_zformatspec() "%9.2e + %9.2e "
#define bli_iformatspec() "%6d"
// set scalar datatype and buffer
#define bli_set_scalar_dt_buffer( obj_scalar, dt_aux, dt_scalar, buf_scalar ) \
{ \
if ( bli_obj_is_const( *(obj_scalar) ) ) \
{ \
dt_scalar = dt_aux; \
buf_scalar = bli_obj_scalar_buffer( dt_scalar, *(obj_scalar) ); \
} \
else \
{ \
dt_scalar = bli_obj_datatype( *(obj_scalar) ); \
buf_scalar = bli_obj_buffer_at_off( *(obj_scalar) ); \
} \
}
// set constant datatype and buffer
#define bli_set_const_dt_buffer( obj_scalar, dt_aux, dt_scalar, buf_scalar ) \
{ \
{ \
dt_scalar = dt_aux; \
buf_scalar = bli_obj_scalar_buffer( dt_scalar, *(obj_scalar) ); \
} \
}
// Set dimensions, increments, effective uplo/diagoff, etc for ONE matrix
// argument.
#define bli_set_dims_incs_uplo_1m( diagoffa, diaga, \
uploa, m, n, rs_a, cs_a, \
uplo_eff, n_elem_max, n_iter, inca, lda, \
ij0, n_shift ) \
{ \
/* If matrix A is entirely "unstored", that is, if either:
- A is lower-stored and entirely above the diagonal, or
- A is upper-stored and entirely below the diagonal
then we mark the storage as implicitly zero. */ \
if ( bli_is_unstored_subpart( diagoffa, BLIS_NO_TRANSPOSE, uploa, m, n ) ) \
{ \
uplo_eff = BLIS_ZEROS; \
} \
else \
{ \
doff_t diagoffa_use = diagoffa; \
doff_t diagoff_eff; \
dim_t n_iter_max; \
\
if ( bli_is_unit_diag( diaga ) ) \
bli_shift_diag_offset_to_shrink_uplo( uploa, diagoffa_use ); \
\
/* If matrix A is entirely "stored", that is, if either:
- A is upper-stored and entirely above the diagonal, or
- A is lower-stored and entirely below the diagonal
then we mark the storage as dense. */ \
if ( bli_is_stored_subpart( diagoffa_use, BLIS_NO_TRANSPOSE, uploa, m, n ) ) \
uploa = BLIS_DENSE; \
\
n_iter_max = n; \
n_elem_max = m; \
inca = rs_a; \
lda = cs_a; \
uplo_eff = uploa; \
diagoff_eff = diagoffa_use; \
\
if ( bli_is_row_tilted( inca, lda ) ) \
{ \
bli_swap_dims( n_iter_max, n_elem_max ); \
bli_swap_incs( inca, lda ); \
bli_toggle_uplo( uplo_eff ); \
bli_negate_diag_offset( diagoff_eff ); \
} \
\
if ( bli_is_dense( uplo_eff ) ) \
{ \
n_iter = n_iter_max; \
} \
else if ( bli_is_upper( uplo_eff ) ) \
{ \
if ( diagoff_eff < 0 ) \
{ \
ij0 = 0; \
n_shift = -diagoff_eff; \
n_elem_max = bli_min( n_elem_max, n_shift + bli_min( m, n ) ); \
n_iter = n_iter_max; \
} \
else \
{ \
ij0 = diagoff_eff; \
n_shift = 0; \
n_iter = n_iter_max - diagoff_eff; \
} \
} \
else /* if ( bli_is_lower( uplo_eff ) ) */ \
{ \
if ( diagoff_eff < 0 ) \
{ \
ij0 = -diagoff_eff; \
n_shift = 0; \
n_elem_max = n_elem_max + diagoff_eff; \
n_iter = bli_min( n_elem_max, bli_min( m, n ) ); \
} \
else \
{ \
ij0 = 0; \
n_shift = diagoff_eff; \
n_iter = bli_min( n_iter_max, n_shift + bli_min( m, n ) ); \
} \
} \
} \
}
// Set dimensions, increments, effective uplo/diagoff, etc for TWO matrix
// arguments.
#define bli_set_dims_incs_uplo_2m( \
diagoffa, diaga, transa, \
uploa, m, n, rs_a, cs_a, rs_b, cs_b, \
uplo_eff, n_elem_max, n_iter, inca, lda, incb, ldb, \
ij0, n_shift \
) \
{ \
/* If matrix A is entirely "unstored", that is, if either:
- A is lower-stored and entirely above the diagonal, or
- A is upper-stored and entirely below the diagonal
then we mark the storage as implicitly zero. */ \
if ( bli_is_unstored_subpart( diagoffa, transa, uploa, m, n ) ) \
{ \
uplo_eff = BLIS_ZEROS; \
} \
else \
{ \
doff_t diagoffa_use = diagoffa; \
doff_t diagoff_eff; \
dim_t n_iter_max; \
\
if ( bli_is_unit_diag( diaga ) ) \
bli_shift_diag_offset_to_shrink_uplo( uploa, diagoffa_use ); \
\
/* If matrix A is entirely "stored", that is, if either:
- A is upper-stored and entirely above the diagonal, or
- A is lower-stored and entirely below the diagonal
then we mark the storage as dense. */ \
if ( bli_is_stored_subpart( diagoffa_use, transa, uploa, m, n ) ) \
uploa = BLIS_DENSE; \
\
n_iter_max = n; \
n_elem_max = m; \
inca = rs_a; \
lda = cs_a; \
incb = rs_b; \
ldb = cs_b; \
uplo_eff = uploa; \
diagoff_eff = diagoffa_use; \
\
if ( bli_does_trans( transa ) ) \
{ \
bli_swap_incs( inca, lda ); \
bli_toggle_uplo( uplo_eff ); \
bli_negate_diag_offset( diagoff_eff ); \
} \
\
if ( bli_is_row_tilted( incb, ldb ) && \
bli_is_row_tilted( inca, lda ) ) \
{ \
bli_swap_dims( n_iter_max, n_elem_max ); \
bli_swap_incs( inca, lda ); \
bli_swap_incs( incb, ldb ); \
bli_toggle_uplo( uplo_eff ); \
bli_negate_diag_offset( diagoff_eff ); \
} \
\
if ( bli_is_dense( uplo_eff ) ) \
{ \
n_iter = n_iter_max; \
} \
else if ( bli_is_upper( uplo_eff ) ) \
{ \
if ( diagoff_eff < 0 ) \
{ \
/*printf( "uplo_eff = upper, diagoff_eff < 0\n" );*/ \
ij0 = 0; \
n_shift = -diagoff_eff; \
n_elem_max = bli_min( n_elem_max, n_shift + bli_min( m, n ) ); \
n_iter = n_iter_max; \
} \
else \
{ \
/*printf( "uplo_eff = upper, diagoff_eff >= 0\n" );*/ \
ij0 = diagoff_eff; \
n_shift = 0; \
n_iter = n_iter_max - diagoff_eff; \
} \
} \
else /* if ( bli_is_lower( uplo_eff ) ) */ \
{ \
if ( diagoff_eff < 0 ) \
{ \
/*printf( "uplo_eff = lower, diagoff_eff < 0\n" );*/ \
ij0 = -diagoff_eff; \
n_shift = 0; \
n_elem_max = n_elem_max + diagoff_eff; \
n_iter = bli_min( n_elem_max, bli_min( m, n ) ); \
} \
else \
{ \
/*printf( "uplo_eff = lower, diagoff_eff >= 0\n" );*/ \
ij0 = 0; \
n_shift = diagoff_eff; \
n_iter = bli_min( n_iter_max, n_shift + bli_min( m, n ) ); \
} \
} \
} \
}
// Set dimensions, increments, etc for ONE matrix argument when operating
// on the diagonal.
#define bli_set_dims_incs_1d( diagoffx, \
m, n, rs_x, cs_x, \
offx, n_elem, incx ) \
{ \
if ( diagoffx < 0 ) \
{ \
n_elem = bli_min( m - ( dim_t )(-diagoffx), n ); \
offx = ( dim_t )(-diagoffx) * rs_x; \
} \
else \
{ \
n_elem = bli_min( n - ( dim_t )( diagoffx), m ); \
offx = ( dim_t )( diagoffx) * cs_x; \
} \
\
incx = rs_x + cs_x; \
}
// Set dimensions, increments, etc for TWO matrix arguments when operating
// on diagonals.
#define bli_set_dims_incs_2d( diagoffx, transx, \
m, n, rs_x, cs_x, rs_y, cs_y, \
offx, offy, n_elem, incx, incy ) \
{ \
doff_t diagoffy = bli_diag_offset_with_trans( transx, diagoffx ); \
\
if ( diagoffx < 0 ) offx = -diagoffx * rs_x; \
else offx = diagoffx * cs_x; \
\
if ( diagoffy < 0 ) \
{ \
n_elem = bli_min( m - ( dim_t )(-diagoffy), n ); \
offy = -diagoffy * rs_y; \
} \
else \
{ \
n_elem = bli_min( n - ( dim_t )( diagoffy), m ); \
offy = diagoffy * cs_y; \
} \
\
incx = rs_x + cs_x; \
incy = rs_y + cs_y; \
}
#endif