mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Added extensive support for configuration defaults.
Details:
- Standard names for reference kernels (levels-1v, -1f and 3) are now
macro constants. Examples:
BLIS_SAXPYV_KERNEL_REF
BLIS_DDOTXF_KERNEL_REF
BLIS_ZGEMM_UKERNEL_REF
- Developers no longer have to name all datatype instances of a kernel
with a common base name; [sdcz] datatype flavors of each kernel or
micro-kernel (level-1v, -1f, or 3) may now be named independently.
This means you can now, if you wish, encode the datatype-specific
register blocksizes in the name of the micro-kernel functions.
- Any datatype instances of any kernel (1v, 1f, or 3) that is left
undefined in bli_kernel.h will default to the corresponding reference
implementation. For example, if BLIS_DGEMM_UKERNEL is left undefined,
it will be defined to be BLIS_DGEMM_UKERNEL_REF.
- Developers no longer need to name level-1v/-1f kernels with multiple
datatype chars to match the number of types the kernel WOULD take in
a mixed type environment, as in bli_dddaxpyv_opt(). Now, one char is
sufficient, as in bli_daxpyv_opt().
- There is no longer a need to define an obj_t wrapper to go along with
your level-1v/-1f kernels. The framework now prvides a _kernel()
function which serves as the obj_t wrapper for whatever kernels are
specified (or defaulted to) via bli_kernel.h
- Developers no longer need to prototype their kernels, and thus no
longer need to include any prototyping headers from within
bli_kernel.h. The framework now generates kernel prototypes, with the
proper type signature, based on the kernel names defined (or defaulted
to) via bli_kernel.h.
- If the complex datatype x (of [cz]) implementation of the gemm micro-
kernel is left undefined by bli_kernel.h, but its same-precision real
domain equivalent IS defined, BLIS will use a 4m-based implementation
for the datatype x implementations of all level-3 operations, using
only the real gemm micro-kernel.
This commit is contained in:
@@ -35,7 +35,6 @@
|
||||
#include "blis.h"
|
||||
|
||||
|
||||
/*
|
||||
//
|
||||
// Define object-based interface.
|
||||
//
|
||||
@@ -60,8 +59,7 @@ void PASTEMAC0(opname)( \
|
||||
z ); \
|
||||
}
|
||||
|
||||
GENFRONT( axpy2v, AXPY2V_KERNEL )
|
||||
*/
|
||||
GENFRONT( axpy2v, axpy2v_kernel )
|
||||
|
||||
|
||||
//
|
||||
@@ -123,7 +121,6 @@ void PASTEMAC3(chx,chy,chz,opname)( \
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
//INSERT_GENTFUNC3U12_BASIC( axpy2v, axpy2v_unb_var1 )
|
||||
INSERT_GENTFUNC3U12_BASIC( axpy2v, AXPY2V_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
|
||||
@@ -33,7 +33,8 @@
|
||||
*/
|
||||
|
||||
#include "bli_axpy2v_check.h"
|
||||
#include "bli_axpy2v_unb_var1.h"
|
||||
#include "bli_axpy2v_kernel.h"
|
||||
#include "bli_axpy2v_ref.h"
|
||||
|
||||
|
||||
//
|
||||
|
||||
150
frame/1f/axpy2v/bli_axpy2v_kernel.c
Normal file
150
frame/1f/axpy2v/bli_axpy2v_kernel.c
Normal file
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#define FUNCPTR_T axpy2v_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
conj_t conjx,
|
||||
conj_t conjy,
|
||||
dim_t n,
|
||||
void* alpha1,
|
||||
void* alpha2,
|
||||
void* x, inc_t incx,
|
||||
void* y, inc_t incy,
|
||||
void* z, inc_t incz
|
||||
);
|
||||
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,axpy2v_kernel_void);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,axpy2v_kernel_void);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,axpy2v_kernel_void);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_axpy2v_kernel( obj_t* alpha1,
|
||||
obj_t* alpha2,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
obj_t* z )
|
||||
{
|
||||
num_t dt_x = bli_obj_datatype( *x );
|
||||
num_t dt_y = bli_obj_datatype( *y );
|
||||
|
||||
conj_t conjx = bli_obj_conj_status( *x );
|
||||
conj_t conjy = bli_obj_conj_status( *y );
|
||||
dim_t n = bli_obj_vector_dim( *x );
|
||||
|
||||
inc_t inc_x = bli_obj_vector_inc( *x );
|
||||
void* buf_x = bli_obj_buffer_at_off( *x );
|
||||
|
||||
inc_t inc_y = bli_obj_vector_inc( *y );
|
||||
void* buf_y = bli_obj_buffer_at_off( *y );
|
||||
|
||||
inc_t inc_z = bli_obj_vector_inc( *z );
|
||||
void* buf_z = bli_obj_buffer_at_off( *z );
|
||||
|
||||
num_t dt_alpha1;
|
||||
void* buf_alpha1;
|
||||
|
||||
num_t dt_alpha2;
|
||||
void* buf_alpha2;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// If alpha is a scalar constant, use dt_x to extract the address of the
|
||||
// corresponding constant value; otherwise, use the datatype encoded
|
||||
// within the alpha object and extract the buffer at the alpha offset.
|
||||
bli_set_scalar_dt_buffer( alpha1, dt_x, dt_alpha1, buf_alpha1 );
|
||||
bli_set_scalar_dt_buffer( alpha2, dt_x, dt_alpha2, buf_alpha2 );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_alpha1][dt_x][dt_y];
|
||||
|
||||
// Invoke the function.
|
||||
f( conjx,
|
||||
conjy,
|
||||
n,
|
||||
buf_alpha1,
|
||||
buf_alpha2,
|
||||
buf_x, inc_x,
|
||||
buf_y, inc_y,
|
||||
buf_z, inc_z );
|
||||
}
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, varname, kername ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chz,varname)( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
void* alpha1, \
|
||||
void* alpha2, \
|
||||
void* x, inc_t incx, \
|
||||
void* y, inc_t incy, \
|
||||
void* z, inc_t incz \
|
||||
) \
|
||||
{ \
|
||||
PASTEMAC3(chx,chy,chz,kername)( conjx, \
|
||||
conjy, \
|
||||
n, \
|
||||
alpha1, \
|
||||
alpha2, \
|
||||
x, incx, \
|
||||
y, incy, \
|
||||
z, incz ); \
|
||||
}
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( axpy2v_kernel_void, AXPY2V_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( axpy2v_kernel_void, AXPY2V_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( axpy2v_kernel_void, AXPY2V_KERNEL )
|
||||
#endif
|
||||
|
||||
@@ -32,13 +32,17 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_axpy2v_unb_var1( obj_t* alpha1,
|
||||
obj_t* alpha2,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
obj_t* z );
|
||||
void bli_axpy2v_kernel( obj_t* alpha1,
|
||||
obj_t* alpha2,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
obj_t* z );
|
||||
|
||||
|
||||
//
|
||||
// Prototype the void pointer kernel wrappers.
|
||||
//
|
||||
|
||||
#undef GENTPROT3
|
||||
#define GENTPROT3( ctype_x, ctype_y, ctype_z, chx, chy, chz, varname ) \
|
||||
\
|
||||
@@ -53,13 +57,13 @@ void PASTEMAC3(chx,chy,chz,varname)( \
|
||||
void* z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3_BASIC( axpy2v_unb_var1 )
|
||||
INSERT_GENTPROT3_BASIC( axpy2v_kernel_void )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3_MIX_D( axpy2v_unb_var1 )
|
||||
INSERT_GENTPROT3_MIX_D( axpy2v_kernel_void )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3_MIX_P( axpy2v_unb_var1 )
|
||||
INSERT_GENTPROT3_MIX_P( axpy2v_kernel_void )
|
||||
#endif
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
/*
|
||||
#define FUNCPTR_T axpy2v_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
@@ -50,17 +51,17 @@ typedef void (*FUNCPTR_T)(
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,axpy2v_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,axpy2v_ref);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,axpy2v_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,axpy2v_ref);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,axpy2v_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,axpy2v_ref);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_axpy2v_unb_var1( obj_t* alpha1,
|
||||
void bli_axpy2v_ref( obj_t* alpha1,
|
||||
obj_t* alpha2,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
@@ -110,21 +111,23 @@ void bli_axpy2v_unb_var1( obj_t* alpha1,
|
||||
buf_y, inc_y,
|
||||
buf_z, inc_z );
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, varname, kername ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chz,varname)( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
void* alpha1, \
|
||||
void* alpha2, \
|
||||
void* x, inc_t incx, \
|
||||
void* y, inc_t incy, \
|
||||
void* z, inc_t incz \
|
||||
) \
|
||||
void PASTEMAC3(chx,chy,chz,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype_xy* restrict alpha1, \
|
||||
ctype_xy* restrict alpha2, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy, \
|
||||
ctype_z* restrict z, inc_t incz \
|
||||
) \
|
||||
{ \
|
||||
ctype_xy* alpha1_cast = alpha1; \
|
||||
ctype_xy* alpha2_cast = alpha2; \
|
||||
@@ -146,13 +149,13 @@ void PASTEMAC3(chx,chy,chz,varname)( \
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( axpy2v_unb_var1, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_BASIC( axpy2v_ref, AXPYV_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( axpy2v_unb_var1, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_D( axpy2v_ref, AXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( axpy2v_unb_var1, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_P( axpy2v_ref, AXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
68
frame/1f/axpy2v/bli_axpy2v_ref.h
Normal file
68
frame/1f/axpy2v/bli_axpy2v_ref.h
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
void bli_axpy2v_ref( obj_t* alpha1,
|
||||
obj_t* alpha2,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
obj_t* z );
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, varname ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chz,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype_xy* restrict alpha1, \
|
||||
ctype_xy* restrict alpha2, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy, \
|
||||
ctype_z* restrict z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( axpy2v_ref )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( axpy2v_ref )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( axpy2v_ref )
|
||||
#endif
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
#include "blis.h"
|
||||
|
||||
|
||||
/*
|
||||
//
|
||||
// Define object-based interface.
|
||||
//
|
||||
@@ -68,8 +67,7 @@ void PASTEMAC0(opname)( \
|
||||
y ); \
|
||||
}
|
||||
|
||||
GENFRONT( axpyf, AXPYF_KERNEL )
|
||||
*/
|
||||
GENFRONT( axpyf, axpyf_kernel )
|
||||
|
||||
|
||||
//
|
||||
|
||||
@@ -34,7 +34,8 @@
|
||||
|
||||
#include "bli_axpyf_check.h"
|
||||
#include "bli_axpyf_fusefac.h"
|
||||
#include "bli_axpyf_unb_var1.h"
|
||||
#include "bli_axpyf_kernel.h"
|
||||
#include "bli_axpyf_ref.h"
|
||||
|
||||
|
||||
//
|
||||
|
||||
149
frame/1f/axpyf/bli_axpyf_kernel.c
Normal file
149
frame/1f/axpyf/bli_axpyf_kernel.c
Normal file
@@ -0,0 +1,149 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#define FUNCPTR_T axpyf_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
conj_t conja,
|
||||
conj_t conjx,
|
||||
dim_t m,
|
||||
dim_t b_n,
|
||||
void* alpha,
|
||||
void* a, inc_t inca, inc_t lda,
|
||||
void* x, inc_t incx,
|
||||
void* y, inc_t incy
|
||||
);
|
||||
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,axpyf_kernel_void);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,axpyf_kernel_void);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,axpyf_kernel_void);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_axpyf_kernel( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* y )
|
||||
{
|
||||
num_t dt_a = bli_obj_datatype( *a );
|
||||
num_t dt_x = bli_obj_datatype( *x );
|
||||
num_t dt_y = bli_obj_datatype( *y );
|
||||
|
||||
conj_t conja = bli_obj_conj_status( *a );
|
||||
conj_t conjx = bli_obj_conj_status( *x );
|
||||
|
||||
dim_t m = bli_obj_vector_dim( *y );
|
||||
dim_t b_n = bli_obj_vector_dim( *x );
|
||||
|
||||
void* buf_a = bli_obj_buffer_at_off( *a );
|
||||
inc_t rs_a = bli_obj_row_stride( *a );
|
||||
inc_t cs_a = bli_obj_col_stride( *a );
|
||||
|
||||
inc_t inc_x = bli_obj_vector_inc( *x );
|
||||
void* buf_x = bli_obj_buffer_at_off( *x );
|
||||
|
||||
inc_t inc_y = bli_obj_vector_inc( *y );
|
||||
void* buf_y = bli_obj_buffer_at_off( *y );
|
||||
|
||||
num_t dt_alpha;
|
||||
void* buf_alpha;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_a][dt_x][dt_y];
|
||||
|
||||
// Invoke the function.
|
||||
f( conja,
|
||||
conjx,
|
||||
m,
|
||||
b_n,
|
||||
buf_alpha,
|
||||
buf_a, rs_a, cs_a,
|
||||
buf_x, inc_x,
|
||||
buf_y, inc_y );
|
||||
}
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname, kername ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chx,chy,varname)( \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* x, inc_t incx, \
|
||||
void* y, inc_t incy \
|
||||
) \
|
||||
{ \
|
||||
PASTEMAC3(cha,chx,chy,kername)( conja, \
|
||||
conjx, \
|
||||
m, \
|
||||
b_n, \
|
||||
alpha, \
|
||||
a, inca, lda, \
|
||||
x, incx, \
|
||||
y, incy ); \
|
||||
}
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( axpyf_kernel_void, AXPYF_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( axpyf_kernel_void, AXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( axpyf_kernel_void, AXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
@@ -32,12 +32,16 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_axpyf_unb_var1( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* y );
|
||||
void bli_axpyf_kernel( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* y );
|
||||
|
||||
|
||||
//
|
||||
// Prototype the void pointer kernel wrappers.
|
||||
//
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \
|
||||
\
|
||||
@@ -52,13 +56,13 @@ void PASTEMAC3(cha,chx,chy,varname)( \
|
||||
void* y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( axpyf_unb_var1 )
|
||||
INSERT_GENTPROT3U12_BASIC( axpyf_kernel_void )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( axpyf_unb_var1 )
|
||||
INSERT_GENTPROT3U12_MIX_D( axpyf_kernel_void )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( axpyf_unb_var1 )
|
||||
INSERT_GENTPROT3U12_MIX_P( axpyf_kernel_void )
|
||||
#endif
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
/*
|
||||
#define FUNCPTR_T axpyf_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
@@ -50,17 +51,17 @@ typedef void (*FUNCPTR_T)(
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,axpyf_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,axpyf_ref);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,axpyf_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,axpyf_ref);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,axpyf_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,axpyf_ref);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_axpyf_unb_var1( obj_t* alpha,
|
||||
void bli_axpyf_ref( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* y )
|
||||
@@ -109,21 +110,23 @@ void bli_axpyf_unb_var1( obj_t* alpha,
|
||||
buf_x, inc_x,
|
||||
buf_y, inc_y );
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname, kername ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chx,chy,varname)( \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* x, inc_t incx, \
|
||||
void* y, inc_t incy \
|
||||
) \
|
||||
void PASTEMAC3(cha,chx,chy,varname) \
|
||||
( \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ax* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
) \
|
||||
{ \
|
||||
ctype_ax* alpha_cast = alpha; \
|
||||
ctype_a* a_cast = a; \
|
||||
@@ -154,13 +157,13 @@ void PASTEMAC3(cha,chx,chy,varname)( \
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( axpyf_unb_var1, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_BASIC( axpyf_ref, AXPYV_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( axpyf_unb_var1, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_D( axpyf_ref, AXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( axpyf_unb_var1, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_P( axpyf_ref, AXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
67
frame/1f/axpyf/bli_axpyf_ref.h
Normal file
67
frame/1f/axpyf/bli_axpyf_ref.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
void bli_axpyf_ref( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* y );
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chx,chy,varname) \
|
||||
( \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ax* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( axpyf_ref )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( axpyf_ref )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( axpyf_ref )
|
||||
#endif
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
#include "blis.h"
|
||||
|
||||
|
||||
/*
|
||||
//
|
||||
// Define object-based interface.
|
||||
//
|
||||
@@ -62,8 +61,7 @@ void PASTEMAC0(opname)( \
|
||||
z ); \
|
||||
}
|
||||
|
||||
GENFRONT( dotaxpyv, DOTAXPYV_KERNEL )
|
||||
*/
|
||||
GENFRONT( dotaxpyv, dotaxpyv_kernel )
|
||||
|
||||
|
||||
//
|
||||
|
||||
@@ -33,7 +33,8 @@
|
||||
*/
|
||||
|
||||
#include "bli_dotaxpyv_check.h"
|
||||
#include "bli_dotaxpyv_unb_var1.h"
|
||||
#include "bli_dotaxpyv_kernel.h"
|
||||
#include "bli_dotaxpyv_ref.h"
|
||||
|
||||
|
||||
//
|
||||
|
||||
155
frame/1f/dotaxpyv/bli_dotaxpyv_kernel.c
Normal file
155
frame/1f/dotaxpyv/bli_dotaxpyv_kernel.c
Normal file
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#define FUNCPTR_T dotaxpyv_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
conj_t conjxt,
|
||||
conj_t conjx,
|
||||
conj_t conjy,
|
||||
dim_t n,
|
||||
void* alpha,
|
||||
void* x, inc_t incx,
|
||||
void* y, inc_t incy,
|
||||
void* rho,
|
||||
void* z, inc_t incz
|
||||
);
|
||||
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotaxpyv_kernel_void);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotaxpyv_kernel_void);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotaxpyv_kernel_void);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_dotaxpyv_kernel( obj_t* alpha,
|
||||
obj_t* xt,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
obj_t* rho,
|
||||
obj_t* z )
|
||||
{
|
||||
num_t dt_x = bli_obj_datatype( *x );
|
||||
num_t dt_y = bli_obj_datatype( *y );
|
||||
num_t dt_z = bli_obj_datatype( *z );
|
||||
|
||||
conj_t conjxt = bli_obj_conj_status( *xt );
|
||||
conj_t conjx = bli_obj_conj_status( *x );
|
||||
conj_t conjy = bli_obj_conj_status( *y );
|
||||
dim_t n = bli_obj_vector_dim( *x );
|
||||
|
||||
inc_t inc_x = bli_obj_vector_inc( *x );
|
||||
void* buf_x = bli_obj_buffer_at_off( *x );
|
||||
|
||||
inc_t inc_y = bli_obj_vector_inc( *y );
|
||||
void* buf_y = bli_obj_buffer_at_off( *y );
|
||||
|
||||
inc_t inc_z = bli_obj_vector_inc( *z );
|
||||
void* buf_z = bli_obj_buffer_at_off( *z );
|
||||
|
||||
void* buf_rho = bli_obj_buffer_at_off( *rho );
|
||||
|
||||
num_t dt_alpha;
|
||||
void* buf_alpha;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// If alpha is a scalar constant, use dt_x to extract the address of the
|
||||
// corresponding constant value; otherwise, use the datatype encoded
|
||||
// within the alpha object and extract the buffer at the alpha offset.
|
||||
bli_set_scalar_dt_buffer( alpha, dt_x, dt_alpha, buf_alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_x][dt_y][dt_z];
|
||||
|
||||
// Invoke the function.
|
||||
f( conjxt,
|
||||
conjx,
|
||||
conjy,
|
||||
n,
|
||||
buf_alpha,
|
||||
buf_x, inc_x,
|
||||
buf_y, inc_y,
|
||||
buf_rho,
|
||||
buf_z, inc_z );
|
||||
}
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, varname, kername ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chz,varname)( \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t m, \
|
||||
void* alpha, \
|
||||
void* x, inc_t incx, \
|
||||
void* y, inc_t incy, \
|
||||
void* rho, \
|
||||
void* z, inc_t incz \
|
||||
) \
|
||||
{ \
|
||||
PASTEMAC3(chx,chy,chz,kername)( conjxt, \
|
||||
conjx, \
|
||||
conjy, \
|
||||
m, \
|
||||
alpha, \
|
||||
x, incx, \
|
||||
y, incy, \
|
||||
rho, \
|
||||
z, incz ); \
|
||||
}
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( dotaxpyv_kernel_void, DOTAXPYV_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( dotaxpyv_kernel_void, DOTAXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( dotaxpyv_kernel_void, DOTAXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
@@ -32,14 +32,18 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_dotaxpyv_unb_var1( obj_t* alpha,
|
||||
obj_t* xt,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
obj_t* rho,
|
||||
obj_t* z );
|
||||
void bli_dotaxpyv_kernel( obj_t* alpha,
|
||||
obj_t* xt,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
obj_t* rho,
|
||||
obj_t* z );
|
||||
|
||||
|
||||
//
|
||||
// Prototype the void pointer kernel wrappers.
|
||||
//
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, varname ) \
|
||||
\
|
||||
@@ -55,13 +59,13 @@ void PASTEMAC3(chx,chy,chz,varname)( \
|
||||
void* z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( dotaxpyv_unb_var1 )
|
||||
INSERT_GENTPROT3U12_BASIC( dotaxpyv_kernel_void )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( dotaxpyv_unb_var1 )
|
||||
INSERT_GENTPROT3U12_MIX_D( dotaxpyv_kernel_void )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( dotaxpyv_unb_var1 )
|
||||
INSERT_GENTPROT3U12_MIX_P( dotaxpyv_kernel_void )
|
||||
#endif
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
/*
|
||||
#define FUNCPTR_T dotaxpyv_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
@@ -51,17 +52,17 @@ typedef void (*FUNCPTR_T)(
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotaxpyv_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotaxpyv_ref);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotaxpyv_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotaxpyv_ref);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotaxpyv_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotaxpyv_ref);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_dotaxpyv_unb_var1( obj_t* alpha,
|
||||
void bli_dotaxpyv_ref( obj_t* alpha,
|
||||
obj_t* xt,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
@@ -113,22 +114,24 @@ void bli_dotaxpyv_unb_var1( obj_t* alpha,
|
||||
buf_rho,
|
||||
buf_z, inc_z );
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, varname, dotxvker, axpyvker ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chz,varname)( \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t m, \
|
||||
void* alpha, \
|
||||
void* x, inc_t incx, \
|
||||
void* y, inc_t incy, \
|
||||
void* rho, \
|
||||
void* z, inc_t incz \
|
||||
) \
|
||||
void PASTEMAC3(chx,chy,chz,varname) \
|
||||
( \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t m, \
|
||||
ctype_x* restrict alpha, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy, \
|
||||
ctype_xy* restrict rho, \
|
||||
ctype_z* restrict z, inc_t incz \
|
||||
) \
|
||||
{ \
|
||||
ctype_xy* one = PASTEMAC(chxy,1); \
|
||||
ctype_xy* zero = PASTEMAC(chxy,0); \
|
||||
@@ -155,13 +158,13 @@ void PASTEMAC3(chx,chy,chz,varname)( \
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC2( dotaxpyv_unb_var1, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_BASIC2( dotaxpyv_ref, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D2( dotaxpyv_unb_var1, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_D2( dotaxpyv_ref, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P2( dotaxpyv_unb_var1, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_P2( dotaxpyv_ref, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
70
frame/1f/dotaxpyv/bli_dotaxpyv_ref.h
Normal file
70
frame/1f/dotaxpyv/bli_dotaxpyv_ref.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
void bli_dotaxpyv_ref( obj_t* alpha,
|
||||
obj_t* xt,
|
||||
obj_t* x,
|
||||
obj_t* y,
|
||||
obj_t* rho,
|
||||
obj_t* z );
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_x, ctype_y, ctype_z, ctype_xy, chx, chy, chz, chxy, varname ) \
|
||||
\
|
||||
void PASTEMAC3(chx,chy,chz,varname) \
|
||||
( \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t m, \
|
||||
ctype_x* restrict alpha, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict y, inc_t incy, \
|
||||
ctype_xy* restrict rho, \
|
||||
ctype_z* restrict z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( dotaxpyv_ref )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( dotaxpyv_ref )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( dotaxpyv_ref )
|
||||
#endif
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
#include "blis.h"
|
||||
|
||||
|
||||
/*
|
||||
//
|
||||
// Define object-based interface.
|
||||
//
|
||||
@@ -66,8 +65,7 @@ void PASTEMAC0(opname)( \
|
||||
z ); \
|
||||
}
|
||||
|
||||
GENFRONT( dotxaxpyf, DOTXAXPYF_KERNEL )
|
||||
*/
|
||||
GENFRONT( dotxaxpyf, dotxaxpyf_kernel )
|
||||
|
||||
|
||||
//
|
||||
|
||||
@@ -34,8 +34,9 @@
|
||||
|
||||
#include "bli_dotxaxpyf_check.h"
|
||||
#include "bli_dotxaxpyf_fusefac.h"
|
||||
#include "bli_dotxaxpyf_unb_var1.h"
|
||||
#include "bli_dotxaxpyf_unb_var2.h"
|
||||
#include "bli_dotxaxpyf_kernel.h"
|
||||
#include "bli_dotxaxpyf_ref_var1.h"
|
||||
#include "bli_dotxaxpyf_ref_var2.h"
|
||||
|
||||
|
||||
//
|
||||
|
||||
188
frame/1f/dotxaxpyf/bli_dotxaxpyf_kernel.c
Normal file
188
frame/1f/dotxaxpyf/bli_dotxaxpyf_kernel.c
Normal file
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#define FUNCPTR_T dotxaxpyf_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
conj_t conjat,
|
||||
conj_t conja,
|
||||
conj_t conjw,
|
||||
conj_t conjx,
|
||||
dim_t m,
|
||||
dim_t b_n,
|
||||
void* alpha,
|
||||
void* a, inc_t inca, inc_t lda,
|
||||
void* w, inc_t incw,
|
||||
void* x, inc_t incx,
|
||||
void* beta,
|
||||
void* y, inc_t incy,
|
||||
void* z, inc_t incz
|
||||
);
|
||||
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxaxpyf_kernel_void);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxaxpyf_kernel_void);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxaxpyf_kernel_void);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_dotxaxpyf_kernel( obj_t* alpha,
|
||||
obj_t* at,
|
||||
obj_t* a,
|
||||
obj_t* w,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y,
|
||||
obj_t* z )
|
||||
{
|
||||
num_t dt_a = bli_obj_datatype( *a );
|
||||
num_t dt_x = bli_obj_datatype( *x );
|
||||
num_t dt_y = bli_obj_datatype( *y );
|
||||
|
||||
conj_t conjat = bli_obj_conj_status( *at );
|
||||
conj_t conja = bli_obj_conj_status( *a );
|
||||
conj_t conjw = bli_obj_conj_status( *w );
|
||||
conj_t conjx = bli_obj_conj_status( *x );
|
||||
|
||||
dim_t m = bli_obj_vector_dim( *z );
|
||||
dim_t b_n = bli_obj_vector_dim( *y );
|
||||
|
||||
void* buf_a = bli_obj_buffer_at_off( *a );
|
||||
inc_t rs_a = bli_obj_row_stride( *a );
|
||||
inc_t cs_a = bli_obj_col_stride( *a );
|
||||
|
||||
inc_t inc_w = bli_obj_vector_inc( *w );
|
||||
void* buf_w = bli_obj_buffer_at_off( *w );
|
||||
|
||||
inc_t inc_x = bli_obj_vector_inc( *x );
|
||||
void* buf_x = bli_obj_buffer_at_off( *x );
|
||||
|
||||
inc_t inc_y = bli_obj_vector_inc( *y );
|
||||
void* buf_y = bli_obj_buffer_at_off( *y );
|
||||
|
||||
inc_t inc_z = bli_obj_vector_inc( *z );
|
||||
void* buf_z = bli_obj_buffer_at_off( *z );
|
||||
|
||||
num_t dt_alpha;
|
||||
void* buf_alpha;
|
||||
|
||||
num_t dt_beta;
|
||||
void* buf_beta;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_a][dt_x][dt_y];
|
||||
|
||||
// Invoke the function.
|
||||
f( conjat,
|
||||
conja,
|
||||
conjw,
|
||||
conjx,
|
||||
m,
|
||||
b_n,
|
||||
buf_alpha,
|
||||
buf_a, rs_a, cs_a,
|
||||
buf_w, inc_w,
|
||||
buf_x, inc_x,
|
||||
buf_beta,
|
||||
buf_y, inc_y,
|
||||
buf_z, inc_z );
|
||||
}
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname, kername ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chb,chc,varname)( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* w, inc_t incw, \
|
||||
void* x, inc_t incx, \
|
||||
void* beta, \
|
||||
void* y, inc_t incy, \
|
||||
void* z, inc_t incz \
|
||||
) \
|
||||
{ \
|
||||
PASTEMAC3(cha,chb,chc,kername)( conjat, \
|
||||
conja, \
|
||||
conjw, \
|
||||
conjx, \
|
||||
m, \
|
||||
b_n, \
|
||||
alpha, \
|
||||
a, inca, lda, \
|
||||
w, incw, \
|
||||
x, incx, \
|
||||
beta, \
|
||||
y, incy, \
|
||||
z, incz ); \
|
||||
}
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( dotxaxpyf_kernel_void, DOTXAXPYF_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( dotxaxpyf_kernel_void, DOTXAXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( dotxaxpyf_kernel_void, DOTXAXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
@@ -32,16 +32,20 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_dotxaxpyf_unb_var2( obj_t* alpha,
|
||||
obj_t* at,
|
||||
obj_t* a,
|
||||
obj_t* w,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y,
|
||||
obj_t* z );
|
||||
void bli_dotxaxpyf_kernel( obj_t* alpha,
|
||||
obj_t* at,
|
||||
obj_t* a,
|
||||
obj_t* w,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y,
|
||||
obj_t* z );
|
||||
|
||||
|
||||
//
|
||||
// Prototype the void pointer kernel wrappers.
|
||||
//
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname ) \
|
||||
\
|
||||
@@ -61,13 +65,13 @@ void PASTEMAC3(cha,chb,chc,varname)( \
|
||||
void* z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( dotxaxpyf_unb_var2 )
|
||||
INSERT_GENTPROT3U12_BASIC( dotxaxpyf_kernel_void )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( dotxaxpyf_unb_var2 )
|
||||
INSERT_GENTPROT3U12_MIX_D( dotxaxpyf_kernel_void )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( dotxaxpyf_unb_var2 )
|
||||
INSERT_GENTPROT3U12_MIX_P( dotxaxpyf_kernel_void )
|
||||
#endif
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
/*
|
||||
#define FUNCPTR_T dotxaxpyf_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
@@ -55,17 +56,17 @@ typedef void (*FUNCPTR_T)(
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxaxpyf_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxaxpyf_ref_var1);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxaxpyf_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxaxpyf_ref_var1);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxaxpyf_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxaxpyf_ref_var1);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_dotxaxpyf_unb_var1( obj_t* alpha,
|
||||
void bli_dotxaxpyf_ref_var1( obj_t* alpha,
|
||||
obj_t* at,
|
||||
obj_t* a,
|
||||
obj_t* w,
|
||||
@@ -138,26 +139,28 @@ void bli_dotxaxpyf_unb_var1( obj_t* alpha,
|
||||
buf_y, inc_y,
|
||||
buf_z, inc_z );
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname, dotxvker, axpyvker ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chb,chc,varname)( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* w, inc_t incw, \
|
||||
void* x, inc_t incx, \
|
||||
void* beta, \
|
||||
void* y, inc_t incy, \
|
||||
void* z, inc_t incz \
|
||||
) \
|
||||
void PASTEMAC3(cha,chb,chc,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ab* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_b* restrict w, inc_t incw, \
|
||||
ctype_b* restrict x, inc_t incx, \
|
||||
ctype_c* restrict beta, \
|
||||
ctype_c* restrict y, inc_t incy, \
|
||||
ctype_c* restrict z, inc_t incz \
|
||||
) \
|
||||
{ \
|
||||
ctype_ab* alpha_cast = alpha; \
|
||||
ctype_a* a_cast = a; \
|
||||
@@ -212,13 +215,13 @@ void PASTEMAC3(cha,chb,chc,varname)( \
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC2( dotxaxpyf_unb_var1, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_BASIC2( dotxaxpyf_ref_var1, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D2( dotxaxpyf_unb_var1, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_D2( dotxaxpyf_ref_var1, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P2( dotxaxpyf_unb_var1, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_P2( dotxaxpyf_ref_var1, DOTXV_KERNEL, AXPYV_KERNEL )
|
||||
#endif
|
||||
|
||||
@@ -32,7 +32,8 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_dotxaxpyf_unb_var1( obj_t* alpha,
|
||||
/*
|
||||
void bli_dotxaxpyf_ref_var1( obj_t* alpha,
|
||||
obj_t* at,
|
||||
obj_t* a,
|
||||
obj_t* w,
|
||||
@@ -40,34 +41,36 @@ void bli_dotxaxpyf_unb_var1( obj_t* alpha,
|
||||
obj_t* beta,
|
||||
obj_t* y,
|
||||
obj_t* z );
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chb,chc,varname)( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* w, inc_t incw, \
|
||||
void* x, inc_t incx, \
|
||||
void* beta, \
|
||||
void* y, inc_t incy, \
|
||||
void* z, inc_t incz \
|
||||
);
|
||||
void PASTEMAC3(cha,chb,chc,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ab* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_b* restrict w, inc_t incw, \
|
||||
ctype_b* restrict x, inc_t incx, \
|
||||
ctype_c* restrict beta, \
|
||||
ctype_c* restrict y, inc_t incy, \
|
||||
ctype_c* restrict z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( dotxaxpyf_unb_var1 )
|
||||
INSERT_GENTPROT3U12_BASIC( dotxaxpyf_ref_var1 )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( dotxaxpyf_unb_var1 )
|
||||
INSERT_GENTPROT3U12_MIX_D( dotxaxpyf_ref_var1 )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( dotxaxpyf_unb_var1 )
|
||||
INSERT_GENTPROT3U12_MIX_P( dotxaxpyf_ref_var1 )
|
||||
#endif
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
/*
|
||||
#define FUNCPTR_T dotxaxpyf_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
@@ -55,17 +56,17 @@ typedef void (*FUNCPTR_T)(
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxaxpyf_unb_var2);
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxaxpyf_ref_var2);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxaxpyf_unb_var2);
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxaxpyf_ref_var2);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxaxpyf_unb_var2);
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxaxpyf_ref_var2);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_dotxaxpyf_unb_var2( obj_t* alpha,
|
||||
void bli_dotxaxpyf_ref_var2( obj_t* alpha,
|
||||
obj_t* at,
|
||||
obj_t* a,
|
||||
obj_t* w,
|
||||
@@ -138,26 +139,28 @@ void bli_dotxaxpyf_unb_var2( obj_t* alpha,
|
||||
buf_y, inc_y,
|
||||
buf_z, inc_z );
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname, dotxfker, axpyfker ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chb,chc,varname)( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* w, inc_t incw, \
|
||||
void* x, inc_t incx, \
|
||||
void* beta, \
|
||||
void* y, inc_t incy, \
|
||||
void* z, inc_t incz \
|
||||
) \
|
||||
void PASTEMAC3(cha,chb,chc,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ab* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_b* restrict w, inc_t incw, \
|
||||
ctype_b* restrict x, inc_t incx, \
|
||||
ctype_c* restrict beta, \
|
||||
ctype_c* restrict y, inc_t incy, \
|
||||
ctype_c* restrict z, inc_t incz \
|
||||
) \
|
||||
{ \
|
||||
ctype_ab* alpha_cast = alpha; \
|
||||
ctype_a* a_cast = a; \
|
||||
@@ -193,13 +196,13 @@ void PASTEMAC3(cha,chb,chc,varname)( \
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC2( dotxaxpyf_unb_var2, DOTXF_KERNEL, AXPYF_KERNEL )
|
||||
INSERT_GENTFUNC3U12_BASIC2( dotxaxpyf_ref_var2, DOTXF_KERNEL, AXPYF_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D2( dotxaxpyf_unb_var2, DOTXF_KERNEL, AXPYF_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_D2( dotxaxpyf_ref_var2, DOTXF_KERNEL, AXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P2( dotxaxpyf_unb_var2, DOTXF_KERNEL, AXPYF_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_P2( dotxaxpyf_ref_var2, DOTXF_KERNEL, AXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
74
frame/1f/dotxaxpyf/bli_dotxaxpyf_ref_var2.h
Normal file
74
frame/1f/dotxaxpyf/bli_dotxaxpyf_ref_var2.h
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_dotxaxpyf_ref_var2( obj_t* alpha,
|
||||
obj_t* at,
|
||||
obj_t* a,
|
||||
obj_t* w,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y,
|
||||
obj_t* z );
|
||||
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chb,chc,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ab* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_b* restrict w, inc_t incw, \
|
||||
ctype_b* restrict x, inc_t incx, \
|
||||
ctype_c* restrict beta, \
|
||||
ctype_c* restrict y, inc_t incy, \
|
||||
ctype_c* restrict z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( dotxaxpyf_ref_var2 )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( dotxaxpyf_ref_var2 )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( dotxaxpyf_ref_var2 )
|
||||
#endif
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
#include "blis.h"
|
||||
|
||||
|
||||
/*
|
||||
//
|
||||
// Define object-based interface.
|
||||
//
|
||||
@@ -70,8 +69,7 @@ void PASTEMAC0(opname)( \
|
||||
y ); \
|
||||
}
|
||||
|
||||
GENFRONT( dotxf, DOTXF_KERNEL )
|
||||
*/
|
||||
GENFRONT( dotxf, dotxf_kernel )
|
||||
|
||||
|
||||
//
|
||||
|
||||
@@ -34,7 +34,8 @@
|
||||
|
||||
#include "bli_dotxf_check.h"
|
||||
#include "bli_dotxf_fusefac.h"
|
||||
#include "bli_dotxf_unb_var1.h"
|
||||
#include "bli_dotxf_kernel.h"
|
||||
#include "bli_dotxf_ref.h"
|
||||
|
||||
|
||||
//
|
||||
|
||||
161
frame/1f/dotxf/bli_dotxf_kernel.c
Normal file
161
frame/1f/dotxf/bli_dotxf_kernel.c
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#define FUNCPTR_T dotxf_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
conj_t conjat,
|
||||
conj_t conjx,
|
||||
dim_t m,
|
||||
dim_t b_n,
|
||||
void* alpha,
|
||||
void* a, inc_t inca, inc_t lda,
|
||||
void* x, inc_t incx,
|
||||
void* beta,
|
||||
void* y, inc_t incy
|
||||
);
|
||||
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxf_kernel_void);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxf_kernel_void);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxf_kernel_void);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_dotxf_kernel( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y )
|
||||
{
|
||||
num_t dt_a = bli_obj_datatype( *a );
|
||||
num_t dt_x = bli_obj_datatype( *x );
|
||||
num_t dt_y = bli_obj_datatype( *y );
|
||||
|
||||
conj_t conjat = bli_obj_conj_status( *a );
|
||||
conj_t conjx = bli_obj_conj_status( *x );
|
||||
|
||||
dim_t m = bli_obj_vector_dim( *x );
|
||||
dim_t b_n = bli_obj_vector_dim( *y );
|
||||
|
||||
void* buf_a = bli_obj_buffer_at_off( *a );
|
||||
inc_t rs_a = bli_obj_row_stride( *a );
|
||||
inc_t cs_a = bli_obj_col_stride( *a );
|
||||
|
||||
inc_t inc_x = bli_obj_vector_inc( *x );
|
||||
void* buf_x = bli_obj_buffer_at_off( *x );
|
||||
|
||||
inc_t inc_y = bli_obj_vector_inc( *y );
|
||||
void* buf_y = bli_obj_buffer_at_off( *y );
|
||||
|
||||
num_t dt_alpha;
|
||||
void* buf_alpha;
|
||||
|
||||
num_t dt_beta;
|
||||
void* buf_beta;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_a][dt_x][dt_y];
|
||||
|
||||
// Invoke the function.
|
||||
f( conjat,
|
||||
conjx,
|
||||
m,
|
||||
b_n,
|
||||
buf_alpha,
|
||||
buf_a, rs_a, cs_a,
|
||||
buf_x, inc_x,
|
||||
buf_beta,
|
||||
buf_y, inc_y );
|
||||
}
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname, kername ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chx,chy,varname)( \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* x, inc_t incx, \
|
||||
void* beta, \
|
||||
void* y, inc_t incy \
|
||||
) \
|
||||
{ \
|
||||
PASTEMAC3(cha,chx,chy,kername)( conjat, \
|
||||
conjx, \
|
||||
m, \
|
||||
b_n, \
|
||||
alpha, \
|
||||
a, inca, lda, \
|
||||
x, incx, \
|
||||
beta, \
|
||||
y, incy ); \
|
||||
}
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( dotxf_kernel_void, DOTXF_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( dotxf_kernel_void, DOTXF_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( dotxf_kernel_void, DOTXF_KERNEL )
|
||||
#endif
|
||||
|
||||
@@ -32,13 +32,17 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_dotxf_unb_var1( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y );
|
||||
void bli_dotxf_kernel( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y );
|
||||
|
||||
|
||||
//
|
||||
// Prototype the void pointer kernel wrappers.
|
||||
//
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \
|
||||
\
|
||||
@@ -54,13 +58,13 @@ void PASTEMAC3(cha,chx,chy,varname)( \
|
||||
void* y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( dotxf_unb_var1 )
|
||||
INSERT_GENTPROT3U12_BASIC( dotxf_kernel_void )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( dotxf_unb_var1 )
|
||||
INSERT_GENTPROT3U12_MIX_D( dotxf_kernel_void )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( dotxf_unb_var1 )
|
||||
INSERT_GENTPROT3U12_MIX_P( dotxf_kernel_void )
|
||||
#endif
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
/*
|
||||
#define FUNCPTR_T dotxf_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
@@ -51,17 +52,17 @@ typedef void (*FUNCPTR_T)(
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxf_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxf_ref);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxf_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxf_ref);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxf_unb_var1);
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxf_ref);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_dotxf_unb_var1( obj_t* alpha,
|
||||
void bli_dotxf_ref( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
@@ -119,22 +120,24 @@ void bli_dotxf_unb_var1( obj_t* alpha,
|
||||
buf_beta,
|
||||
buf_y, inc_y );
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname, kername ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chx,chy,varname)( \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* x, inc_t incx, \
|
||||
void* beta, \
|
||||
void* y, inc_t incy \
|
||||
) \
|
||||
void PASTEMAC3(cha,chx,chy,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ax* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict beta, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
) \
|
||||
{ \
|
||||
ctype_ax* alpha_cast = alpha; \
|
||||
ctype_a* a_cast = a; \
|
||||
@@ -165,13 +168,13 @@ void PASTEMAC3(cha,chx,chy,varname)( \
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( dotxf_unb_var1, DOTXV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_BASIC( dotxf_ref, DOTXV_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( dotxf_unb_var1, DOTXV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_D( dotxf_ref, DOTXV_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( dotxf_unb_var1, DOTXV_KERNEL )
|
||||
INSERT_GENTFUNC3U12_MIX_P( dotxf_ref, DOTXV_KERNEL )
|
||||
#endif
|
||||
|
||||
69
frame/1f/dotxf/bli_dotxf_ref.h
Normal file
69
frame/1f/dotxf/bli_dotxf_ref.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
void bli_dotxf_ref( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y );
|
||||
*/
|
||||
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chx,chy,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype_ax* restrict alpha, \
|
||||
ctype_a* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype_x* restrict x, inc_t incx, \
|
||||
ctype_y* restrict beta, \
|
||||
ctype_y* restrict y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( dotxf_ref )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( dotxf_ref )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( dotxf_ref )
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user