mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Added new dotxaxpyf variant 2.
Details: - Added a new variant for dotxaxpyf that is based on dotxf and axpyf kernels. By default, this variant is not used by any other operation.
This commit is contained in:
@@ -35,6 +35,7 @@
|
||||
#include "bli_dotxaxpyf_check.h"
|
||||
#include "bli_dotxaxpyf_fusefac.h"
|
||||
#include "bli_dotxaxpyf_unb_var1.h"
|
||||
#include "bli_dotxaxpyf_unb_var2.h"
|
||||
|
||||
|
||||
//
|
||||
|
||||
205
frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c
Normal file
205
frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c
Normal file
@@ -0,0 +1,205 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2013, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#define FUNCPTR_T dotxaxpyf_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
conj_t conjat,
|
||||
conj_t conja,
|
||||
conj_t conjw,
|
||||
conj_t conjx,
|
||||
dim_t m,
|
||||
dim_t b_n,
|
||||
void* alpha,
|
||||
void* a, inc_t inca, inc_t lda,
|
||||
void* w, inc_t incw,
|
||||
void* x, inc_t incx,
|
||||
void* beta,
|
||||
void* y, inc_t incy,
|
||||
void* z, inc_t incz
|
||||
);
|
||||
|
||||
// If some mixed datatype functions will not be compiled, we initialize
|
||||
// the corresponding elements of the function array to NULL.
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxaxpyf_unb_var2);
|
||||
#else
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxaxpyf_unb_var2);
|
||||
#else
|
||||
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxaxpyf_unb_var2);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
void bli_dotxaxpyf_unb_var2( obj_t* alpha,
|
||||
obj_t* at,
|
||||
obj_t* a,
|
||||
obj_t* w,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y,
|
||||
obj_t* z )
|
||||
{
|
||||
num_t dt_a = bli_obj_datatype( *a );
|
||||
num_t dt_x = bli_obj_datatype( *x );
|
||||
num_t dt_y = bli_obj_datatype( *y );
|
||||
|
||||
conj_t conjat = bli_obj_conj_status( *at );
|
||||
conj_t conja = bli_obj_conj_status( *a );
|
||||
conj_t conjw = bli_obj_conj_status( *w );
|
||||
conj_t conjx = bli_obj_conj_status( *x );
|
||||
|
||||
dim_t m = bli_obj_vector_dim( *z );
|
||||
dim_t b_n = bli_obj_vector_dim( *y );
|
||||
|
||||
void* buf_a = bli_obj_buffer_at_off( *a );
|
||||
inc_t rs_a = bli_obj_row_stride( *a );
|
||||
inc_t cs_a = bli_obj_col_stride( *a );
|
||||
|
||||
inc_t inc_w = bli_obj_vector_inc( *w );
|
||||
void* buf_w = bli_obj_buffer_at_off( *w );
|
||||
|
||||
inc_t inc_x = bli_obj_vector_inc( *x );
|
||||
void* buf_x = bli_obj_buffer_at_off( *x );
|
||||
|
||||
inc_t inc_y = bli_obj_vector_inc( *y );
|
||||
void* buf_y = bli_obj_buffer_at_off( *y );
|
||||
|
||||
inc_t inc_z = bli_obj_vector_inc( *z );
|
||||
void* buf_z = bli_obj_buffer_at_off( *z );
|
||||
|
||||
num_t dt_alpha;
|
||||
void* buf_alpha;
|
||||
|
||||
num_t dt_beta;
|
||||
void* buf_beta;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_a][dt_x][dt_y];
|
||||
|
||||
// Invoke the function.
|
||||
f( conjat,
|
||||
conja,
|
||||
conjw,
|
||||
conjx,
|
||||
m,
|
||||
b_n,
|
||||
buf_alpha,
|
||||
buf_a, rs_a, cs_a,
|
||||
buf_w, inc_w,
|
||||
buf_x, inc_x,
|
||||
buf_beta,
|
||||
buf_y, inc_y,
|
||||
buf_z, inc_z );
|
||||
}
|
||||
|
||||
|
||||
#undef GENTFUNC3U12
|
||||
#define GENTFUNC3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname, dotxfker, axpyfker ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chb,chc,varname)( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* w, inc_t incw, \
|
||||
void* x, inc_t incx, \
|
||||
void* beta, \
|
||||
void* y, inc_t incy, \
|
||||
void* z, inc_t incz \
|
||||
) \
|
||||
{ \
|
||||
ctype_ab* alpha_cast = alpha; \
|
||||
ctype_a* a_cast = a; \
|
||||
ctype_b* w_cast = w; \
|
||||
ctype_b* x_cast = x; \
|
||||
ctype_c* beta_cast = beta; \
|
||||
ctype_c* y_cast = y; \
|
||||
ctype_c* z_cast = z; \
|
||||
\
|
||||
/* A is m x n. */ \
|
||||
/* y = beta * y + alpha * A^T w; */ \
|
||||
/* z = z + alpha * A x; */ \
|
||||
\
|
||||
PASTEMAC3(cha,chb,chc,dotxfker)( conjat, \
|
||||
conjw, \
|
||||
m, \
|
||||
b_n, \
|
||||
alpha_cast, \
|
||||
a_cast, inca, lda, \
|
||||
w_cast, incw, \
|
||||
beta_cast, \
|
||||
y_cast, incy ); \
|
||||
\
|
||||
PASTEMAC3(cha,chb,chc,axpyfker)( conja, \
|
||||
conjx, \
|
||||
m, \
|
||||
b_n, \
|
||||
alpha_cast, \
|
||||
a_cast, inca, lda, \
|
||||
x_cast, incx, \
|
||||
z_cast, incz ); \
|
||||
}
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC2( dotxaxpyf_unb_var2, DOTXF_KERNEL, AXPYF_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D2( dotxaxpyf_unb_var2, DOTXF_KERNEL, AXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P2( dotxaxpyf_unb_var2, DOTXF_KERNEL, AXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
73
frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.h
Normal file
73
frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2013, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_dotxaxpyf_unb_var2( obj_t* alpha,
|
||||
obj_t* at,
|
||||
obj_t* a,
|
||||
obj_t* w,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y,
|
||||
obj_t* z );
|
||||
|
||||
|
||||
#undef GENTPROT3U12
|
||||
#define GENTPROT3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname ) \
|
||||
\
|
||||
void PASTEMAC3(cha,chb,chc,varname)( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
void* alpha, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* w, inc_t incw, \
|
||||
void* x, inc_t incx, \
|
||||
void* beta, \
|
||||
void* y, inc_t incy, \
|
||||
void* z, inc_t incz \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT3U12_BASIC( dotxaxpyf_unb_var2 )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_D( dotxaxpyf_unb_var2 )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTPROT3U12_MIX_P( dotxaxpyf_unb_var2 )
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user