Implemented castm, castv operations.

Details:
- Implemented castm and castv operations, which behave like copym and
  copyv except where the obj_t operands can be of different datatypes.
  These new operations, however, unlike copym/copyv, do not build upon
  existing level-1v kernels.
- Reorganized projm, projv into a 'proj' subdirectory of frame/base (to
  match the newly added frame/base/cast directory).
- Added new macros to bli_gentfunc_macro_defs.h, _gentprot_macro_defs.h
  that insert GENTFUNC2/GENTPROT2 macros for all non-homogeneous datatype
  combinations. Previously, one had to invoke two additional macros--one
  which mixed domains only and another that included all remaining
  cases--in order to get full type combination coverage.
- Defined a new static function, bli_set_dims_incs_2m(), to aid in the
  setting of various variables in the implementations of bli_??castm().
  This static function joins others like it in bli_param_macro_defs.h.
- Comment update to bli_copysc.h.
This commit is contained in:
Field G. Van Zee
2018-06-18 15:56:26 -05:00
parent ed20392c50
commit e88a5b8da8
17 changed files with 1122 additions and 44 deletions

View File

@@ -402,6 +402,51 @@ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) \
// -- Mixed domain/precision (all) two-operand macro --
// -- (no auxiliary arguments) --
#define INSERT_GENTFUNC2_MIXDP0( tfuncname ) \
\
GENTFUNC2( float, double, s, d, tfuncname ) \
GENTFUNC2( float, scomplex, s, c, tfuncname ) \
GENTFUNC2( float, dcomplex, s, z, tfuncname ) \
\
GENTFUNC2( double, float, d, s, tfuncname ) \
GENTFUNC2( double, scomplex, d, c, tfuncname ) \
GENTFUNC2( double, dcomplex, d, z, tfuncname ) \
\
GENTFUNC2( scomplex, float, c, s, tfuncname ) \
GENTFUNC2( scomplex, double, c, d, tfuncname ) \
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname ) \
\
GENTFUNC2( dcomplex, float, z, s, tfuncname ) \
GENTFUNC2( dcomplex, double, z, d, tfuncname ) \
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname )
// -- (one auxiliary argument) --
#define INSERT_GENTFUNC2_MIX_DP( tfuncname, varname ) \
\
GENTFUNC2( float, double, s, d, tfuncname, varname ) \
GENTFUNC2( float, scomplex, s, c, tfuncname, varname ) \
GENTFUNC2( float, dcomplex, s, z, tfuncname, varname ) \
\
GENTFUNC2( double, float, d, s, tfuncname, varname ) \
GENTFUNC2( double, scomplex, d, c, tfuncname, varname ) \
GENTFUNC2( double, dcomplex, d, z, tfuncname, varname ) \
\
GENTFUNC2( scomplex, float, c, s, tfuncname, varname ) \
GENTFUNC2( scomplex, double, c, d, tfuncname, varname ) \
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname, varname ) \
\
GENTFUNC2( dcomplex, float, z, s, tfuncname, varname ) \
GENTFUNC2( dcomplex, double, z, d, tfuncname, varname ) \
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname )
// -- Basic two-operand with real projection of first operand --
// -- (no auxiliary arguments) --

View File

@@ -395,6 +395,50 @@ GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname ) \
// -- Mixed domain/precision (all) two-operand macro --
// -- (no auxiliary arguments) --
#define INSERT_GENTPROT2_MIXDP0( funcname ) \
\
GENTPROT2( float, double, s, d, funcname ) \
GENTPROT2( float, scomplex, s, c, funcname ) \
GENTPROT2( float, dcomplex, s, z, funcname ) \
\
GENTPROT2( double, float, d, s, funcname ) \
GENTPROT2( double, scomplex, d, c, funcname ) \
GENTPROT2( double, dcomplex, d, z, funcname ) \
\
GENTPROT2( scomplex, float, c, s, funcname ) \
GENTPROT2( scomplex, double, c, d, funcname ) \
GENTPROT2( scomplex, dcomplex, c, z, funcname ) \
\
GENTPROT2( dcomplex, float, z, s, funcname ) \
GENTPROT2( dcomplex, double, z, d, funcname ) \
GENTPROT2( dcomplex, scomplex, z, c, funcname )
// -- (one auxiliary argument) --
#define INSERT_GENTPROT2_MIX_DP( tfuncname, varname ) \
\
GENTPROT2( float, double, s, d, tfuncname, varname ) \
GENTPROT2( float, scomplex, s, c, tfuncname, varname ) \
GENTPROT2( float, dcomplex, s, z, tfuncname, varname ) \
\
GENTPROT2( double, float, d, s, tfuncname, varname ) \
GENTPROT2( double, scomplex, d, c, tfuncname, varname ) \
GENTPROT2( double, dcomplex, d, z, tfuncname, varname ) \
\
GENTPROT2( scomplex, float, c, s, tfuncname, varname ) \
GENTPROT2( scomplex, double, c, d, tfuncname, varname ) \
GENTPROT2( scomplex, dcomplex, c, z, tfuncname, varname ) \
\
GENTPROT2( dcomplex, float, z, s, tfuncname, varname ) \
GENTPROT2( dcomplex, double, z, d, tfuncname, varname ) \
GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname )
// -- Basic two-operand with real projection of first operand --
// -- (no auxiliary arguments) --

View File

@@ -990,6 +990,41 @@ void bli_set_dims_incs_uplo_1m_noswap
}
}
// Set dimensions and increments for TWO matrix arguments.
static
void bli_set_dims_incs_2m
(
trans_t transa,
dim_t m, dim_t n, inc_t rs_a, inc_t cs_a,
inc_t rs_b, inc_t cs_b,
dim_t* n_elem, dim_t* n_iter, inc_t* inca, inc_t* lda,
inc_t* incb, inc_t* ldb
)
{
{
*n_iter = n;
*n_elem = m;
*inca = rs_a;
*lda = cs_a;
*incb = rs_b;
*ldb = cs_b;
if ( bli_does_trans( transa ) )
{
bli_swap_incs( inca, lda );
}
if ( bli_is_row_tilted( *n_elem, *n_iter, *incb, *ldb ) &&
bli_is_row_tilted( *n_elem, *n_iter, *inca, *lda ) )
{
bli_swap_dims( n_iter, n_elem );
bli_swap_incs( inca, lda );
bli_swap_incs( incb, ldb );
}
}
}
// Set dimensions, increments, effective uplo/diagoff, etc for TWO matrix
// arguments.
@@ -1033,7 +1068,7 @@ void bli_set_dims_incs_uplo_2m
if ( bli_is_stored_subpart( diagoffa_use_, transa, uploa, m, n ) )
uploa = BLIS_DENSE;
n_iter_max_ = n;
n_iter_max_ = n;
*n_elem_max = m;
*inca = rs_a;
*lda = cs_a;

View File

@@ -122,9 +122,13 @@ extern "C" {
#include "bli_cpuid.h"
#include "bli_string.h"
#include "bli_setgetij.h"
#include "bli_proj.h"
#include "bli_setri.h"
#include "bli_castm.h"
#include "bli_castv.h"
#include "bli_projm.h"
#include "bli_projv.h"
// -- Level-0 operations --