mirror of
https://github.com/amd/blis.git
synced 2026-05-05 15:01:13 +00:00
conflicts merge for bli_kernel.h
Change-Id: I15d846bd34e11f86ebfd7ed091ff671a1f3366a0
This commit is contained in:
15
README.md
15
README.md
@@ -7,16 +7,17 @@ Introduction
|
|||||||
------------
|
------------
|
||||||
|
|
||||||
BLIS is a portable software framework for instantiating high-performance
|
BLIS is a portable software framework for instantiating high-performance
|
||||||
BLAS-like dense linear algebra libraries. The framework was designed to
|
BLAS-like dense linear algebra libraries. The framework was designed to isolate
|
||||||
isolate essential kernels of computation that, when optimized, immediately
|
essential kernels of computation that, when optimized, immediately enable
|
||||||
enable optimized implementations of most of its commonly used and
|
optimized implementations of most of its commonly used and computationally
|
||||||
computationally intensive operations. BLIS is written in [ISO
|
intensive operations. BLIS is written in [ISO
|
||||||
C99](http://en.wikipedia.org/wiki/C99) and available under a
|
C99](http://en.wikipedia.org/wiki/C99) and available under a
|
||||||
[new/modified/3-clause BSD
|
[new/modified/3-clause BSD
|
||||||
license](http://opensource.org/licenses/BSD-3-Clause). While BLIS exports a
|
license](http://opensource.org/licenses/BSD-3-Clause). While BLIS exports a
|
||||||
[new BLAS-like API](), it also includes a BLAS compatibility layer which gives
|
[new BLAS-like API](https://github.com/flame/blis/wiki/BLISAPIQuickReference),
|
||||||
application developers access to BLIS implementations via traditional [BLAS
|
it also includes a BLAS compatibility layer which gives application developers
|
||||||
routine calls](http://www.netlib.org/lapack/lug/node145.html).
|
access to BLIS implementations via traditional [BLAS routine
|
||||||
|
calls](http://www.netlib.org/lapack/lug/node145.html).
|
||||||
|
|
||||||
For a thorough presentation of our framework, please read our recently accepted
|
For a thorough presentation of our framework, please read our recently accepted
|
||||||
journal article, ["BLIS: A Framework for Rapidly Instantiating BLAS
|
journal article, ["BLIS: A Framework for Rapidly Instantiating BLAS
|
||||||
|
|||||||
@@ -125,6 +125,18 @@
|
|||||||
#define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS
|
#define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// zgemm micro-kernel
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
#define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4
|
||||||
|
#define BLIS_DEFAULT_MC_Z 72
|
||||||
|
#define BLIS_DEFAULT_KC_Z 256
|
||||||
|
#define BLIS_DEFAULT_NC_Z 4080
|
||||||
|
#define BLIS_DEFAULT_MR_Z 3
|
||||||
|
#define BLIS_DEFAULT_NR_Z 4
|
||||||
|
|
||||||
|
#define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS
|
||||||
|
#endif
|
||||||
|
|
||||||
// -- trsm-related --
|
// -- trsm-related --
|
||||||
|
|
||||||
|
|||||||
9
configure
vendored
9
configure
vendored
@@ -91,7 +91,7 @@ print_usage()
|
|||||||
echo " -t MODEL, --enable-threading[=MODEL], --disable-threading"
|
echo " -t MODEL, --enable-threading[=MODEL], --disable-threading"
|
||||||
echo " "
|
echo " "
|
||||||
echo " Enable threading in the library, using threading model"
|
echo " Enable threading in the library, using threading model"
|
||||||
echo " MODEL={omp,pthreads,no}. If MODEL=no or "
|
echo " MODEL={openmp,pthreads,no}. If MODEL=no or "
|
||||||
echo " --disable-threading is specified, threading will be"
|
echo " --disable-threading is specified, threading will be"
|
||||||
echo " disabled. The default is 'no'."
|
echo " disabled. The default is 'no'."
|
||||||
echo " "
|
echo " "
|
||||||
@@ -486,13 +486,18 @@ main()
|
|||||||
|
|
||||||
|
|
||||||
# Check the threading model flag.
|
# Check the threading model flag.
|
||||||
|
<<<<<<< HEAD
|
||||||
|
=======
|
||||||
|
# NOTE: 'omp' is deprecated but still supported; 'openmp' is preferred.
|
||||||
|
>>>>>>> origin/master
|
||||||
enable_openmp='no'
|
enable_openmp='no'
|
||||||
enable_openmp_01=0
|
enable_openmp_01=0
|
||||||
enable_pthreads='no'
|
enable_pthreads='no'
|
||||||
enable_pthreads_01=0
|
enable_pthreads_01=0
|
||||||
if [ "x${threading_model}" = "xauto" ]; then
|
if [ "x${threading_model}" = "xauto" ]; then
|
||||||
echo "${script_name}: determining the threading model automatically."
|
echo "${script_name}: determining the threading model automatically."
|
||||||
elif [ "x${threading_model}" = "xomp" ]; then
|
elif [ "x${threading_model}" = "xopenmp" ] ||
|
||||||
|
[ "x${threading_model}" = "xomp" ]; then
|
||||||
echo "${script_name}: using OpenMP for threading."
|
echo "${script_name}: using OpenMP for threading."
|
||||||
enable_openmp='yes'
|
enable_openmp='yes'
|
||||||
enable_openmp_01=1
|
enable_openmp_01=1
|
||||||
|
|||||||
@@ -99,8 +99,8 @@ void bli_getsc_check
|
|||||||
|
|
||||||
// Check object datatypes.
|
// Check object datatypes.
|
||||||
|
|
||||||
e_val = bli_check_noninteger_object( chi );
|
//e_val = bli_check_noninteger_object( chi );
|
||||||
bli_check_error_code( e_val );
|
//bli_check_error_code( e_val );
|
||||||
|
|
||||||
// Check object dimensions.
|
// Check object dimensions.
|
||||||
|
|
||||||
@@ -125,8 +125,8 @@ void bli_setsc_check
|
|||||||
|
|
||||||
// Check object datatypes.
|
// Check object datatypes.
|
||||||
|
|
||||||
e_val = bli_check_floating_object( chi );
|
//e_val = bli_check_floating_object( chi );
|
||||||
bli_check_error_code( e_val );
|
//bli_check_error_code( e_val );
|
||||||
|
|
||||||
// Check object dimensions.
|
// Check object dimensions.
|
||||||
|
|
||||||
|
|||||||
@@ -198,8 +198,8 @@ void PASTEMAC0(opname) \
|
|||||||
if ( bli_is_constant( dt_chi ) ) dt_use = dt_def; \
|
if ( bli_is_constant( dt_chi ) ) dt_use = dt_def; \
|
||||||
else dt_use = dt_chi; \
|
else dt_use = dt_chi; \
|
||||||
\
|
\
|
||||||
/* Invoke the typed function. */ \
|
/* Invoke the typed function (with integer support). */ \
|
||||||
bli_call_ft_3 \
|
bli_call_ft_3i \
|
||||||
( \
|
( \
|
||||||
dt_use, \
|
dt_use, \
|
||||||
opname, \
|
opname, \
|
||||||
@@ -229,8 +229,8 @@ void PASTEMAC0(opname) \
|
|||||||
if ( bli_error_checking_is_enabled() ) \
|
if ( bli_error_checking_is_enabled() ) \
|
||||||
PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \
|
PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \
|
||||||
\
|
\
|
||||||
/* Invoke the typed function. */ \
|
/* Invoke the typed function (with integer support). */ \
|
||||||
bli_call_ft_3 \
|
bli_call_ft_3i \
|
||||||
( \
|
( \
|
||||||
dt_chi, \
|
dt_chi, \
|
||||||
opname, \
|
opname, \
|
||||||
|
|||||||
@@ -227,3 +227,25 @@ void PASTEMAC(ch,opname) \
|
|||||||
|
|
||||||
INSERT_GENTFUNCR_BASIC0( zipsc )
|
INSERT_GENTFUNCR_BASIC0( zipsc )
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
void bli_igetsc
|
||||||
|
(
|
||||||
|
dim_t* chi,
|
||||||
|
double* zeta_r,
|
||||||
|
double* zeta_i
|
||||||
|
)
|
||||||
|
{
|
||||||
|
PASTEMAC2(i,d,gets)( *chi, *zeta_r, *zeta_i );
|
||||||
|
}
|
||||||
|
|
||||||
|
void bli_isetsc
|
||||||
|
(
|
||||||
|
double zeta_r,
|
||||||
|
double zeta_i,
|
||||||
|
dim_t* chi
|
||||||
|
)
|
||||||
|
{
|
||||||
|
PASTEMAC2(d,i,sets)( zeta_r, zeta_i, *chi );
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -141,3 +141,19 @@ void PASTEMAC(ch,opname) \
|
|||||||
|
|
||||||
INSERT_GENTPROTR_BASIC( zipsc )
|
INSERT_GENTPROTR_BASIC( zipsc )
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
void bli_igetsc
|
||||||
|
(
|
||||||
|
dim_t* chi,
|
||||||
|
double* zeta_r,
|
||||||
|
double* zeta_i
|
||||||
|
);
|
||||||
|
|
||||||
|
void bli_isetsc
|
||||||
|
(
|
||||||
|
double zeta_r,
|
||||||
|
double zeta_i,
|
||||||
|
dim_t* chi
|
||||||
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -46,12 +46,14 @@
|
|||||||
#include "bli_l1v_tapi.h"
|
#include "bli_l1v_tapi.h"
|
||||||
|
|
||||||
// Pack-related
|
// Pack-related
|
||||||
#include "bli_packv.h"
|
// NOTE: packv and unpackv are temporarily disabled.
|
||||||
#include "bli_unpackv.h"
|
//#include "bli_packv.h"
|
||||||
|
//#include "bli_unpackv.h"
|
||||||
|
|
||||||
// Other
|
// Other
|
||||||
#include "bli_scalv_cntl.h"
|
// NOTE: scalv control tree code is temporarily disabled.
|
||||||
#include "bli_scalv_int.h"
|
//#include "bli_scalv_cntl.h"
|
||||||
|
//#include "bli_scalv_int.h"
|
||||||
|
|
||||||
// Reference kernel headers
|
// Reference kernel headers
|
||||||
#include "bli_l1v_ref.h"
|
#include "bli_l1v_ref.h"
|
||||||
|
|||||||
@@ -56,6 +56,21 @@ GENFRONT( subv )
|
|||||||
GENFRONT( swapv )
|
GENFRONT( swapv )
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENFRONT
|
||||||
|
#define GENFRONT( opname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(opname,_check) \
|
||||||
|
( \
|
||||||
|
obj_t* x, \
|
||||||
|
obj_t* index \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
bli_l1v_xi_check( x, index ); \
|
||||||
|
}
|
||||||
|
|
||||||
|
GENFRONT( amaxv )
|
||||||
|
|
||||||
|
|
||||||
#undef GENFRONT
|
#undef GENFRONT
|
||||||
#define GENFRONT( opname ) \
|
#define GENFRONT( opname ) \
|
||||||
\
|
\
|
||||||
@@ -481,3 +496,39 @@ void bli_l1v_ax_check
|
|||||||
bli_check_error_code( e_val );
|
bli_check_error_code( e_val );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bli_l1v_xi_check
|
||||||
|
(
|
||||||
|
obj_t* x,
|
||||||
|
obj_t* index
|
||||||
|
)
|
||||||
|
{
|
||||||
|
err_t e_val;
|
||||||
|
|
||||||
|
// Check object datatypes.
|
||||||
|
|
||||||
|
e_val = bli_check_floating_object( x );
|
||||||
|
bli_check_error_code( e_val );
|
||||||
|
|
||||||
|
e_val = bli_check_integer_object( index );
|
||||||
|
bli_check_error_code( e_val );
|
||||||
|
|
||||||
|
e_val = bli_check_nonconstant_object( index );
|
||||||
|
bli_check_error_code( e_val );
|
||||||
|
|
||||||
|
// Check object dimensions.
|
||||||
|
|
||||||
|
e_val = bli_check_vector_object( x );
|
||||||
|
bli_check_error_code( e_val );
|
||||||
|
|
||||||
|
e_val = bli_check_scalar_object( index );
|
||||||
|
bli_check_error_code( e_val );
|
||||||
|
|
||||||
|
// Check object buffers (for non-NULLness).
|
||||||
|
|
||||||
|
e_val = bli_check_object_buffer( x );
|
||||||
|
bli_check_error_code( e_val );
|
||||||
|
|
||||||
|
e_val = bli_check_object_buffer( index );
|
||||||
|
bli_check_error_code( e_val );
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ void PASTEMAC(opname,_check) \
|
|||||||
( \
|
( \
|
||||||
obj_t* x, \
|
obj_t* x, \
|
||||||
obj_t* y \
|
obj_t* y \
|
||||||
);
|
);
|
||||||
|
|
||||||
GENTPROT( addv )
|
GENTPROT( addv )
|
||||||
GENTPROT( copyv )
|
GENTPROT( copyv )
|
||||||
@@ -52,6 +52,18 @@ GENTPROT( subv )
|
|||||||
GENTPROT( swapv )
|
GENTPROT( swapv )
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTPROT
|
||||||
|
#define GENTPROT( opname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(opname,_check) \
|
||||||
|
( \
|
||||||
|
obj_t* x, \
|
||||||
|
obj_t* index \
|
||||||
|
);
|
||||||
|
|
||||||
|
GENTPROT( amaxv )
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROT
|
#undef GENTPROT
|
||||||
#define GENTPROT( opname ) \
|
#define GENTPROT( opname ) \
|
||||||
\
|
\
|
||||||
@@ -74,7 +86,7 @@ void PASTEMAC(opname,_check) \
|
|||||||
obj_t* alpha, \
|
obj_t* alpha, \
|
||||||
obj_t* x, \
|
obj_t* x, \
|
||||||
obj_t* y \
|
obj_t* y \
|
||||||
);
|
);
|
||||||
|
|
||||||
GENTPROT( axpyv )
|
GENTPROT( axpyv )
|
||||||
GENTPROT( scal2v )
|
GENTPROT( scal2v )
|
||||||
@@ -88,7 +100,7 @@ void PASTEMAC(opname,_check) \
|
|||||||
obj_t* x, \
|
obj_t* x, \
|
||||||
obj_t* y, \
|
obj_t* y, \
|
||||||
obj_t* rho \
|
obj_t* rho \
|
||||||
);
|
);
|
||||||
|
|
||||||
GENTPROT( dotv )
|
GENTPROT( dotv )
|
||||||
|
|
||||||
@@ -103,7 +115,7 @@ void PASTEMAC(opname,_check) \
|
|||||||
obj_t* y, \
|
obj_t* y, \
|
||||||
obj_t* beta, \
|
obj_t* beta, \
|
||||||
obj_t* rho \
|
obj_t* rho \
|
||||||
);
|
);
|
||||||
|
|
||||||
GENTPROT( dotxv )
|
GENTPROT( dotxv )
|
||||||
|
|
||||||
@@ -114,7 +126,7 @@ GENTPROT( dotxv )
|
|||||||
void PASTEMAC(opname,_check) \
|
void PASTEMAC(opname,_check) \
|
||||||
( \
|
( \
|
||||||
obj_t* x \
|
obj_t* x \
|
||||||
);
|
);
|
||||||
|
|
||||||
GENTPROT( invertv )
|
GENTPROT( invertv )
|
||||||
|
|
||||||
@@ -126,7 +138,7 @@ void PASTEMAC(opname,_check) \
|
|||||||
( \
|
( \
|
||||||
obj_t* alpha, \
|
obj_t* alpha, \
|
||||||
obj_t* x \
|
obj_t* x \
|
||||||
);
|
);
|
||||||
|
|
||||||
GENTPROT( scalv )
|
GENTPROT( scalv )
|
||||||
GENTPROT( setv )
|
GENTPROT( setv )
|
||||||
@@ -196,3 +208,9 @@ void bli_l1v_ax_check
|
|||||||
obj_t* x
|
obj_t* x
|
||||||
);
|
);
|
||||||
|
|
||||||
|
void bli_l1v_xi_check
|
||||||
|
(
|
||||||
|
obj_t* x,
|
||||||
|
obj_t* index
|
||||||
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \
|
|||||||
}
|
}
|
||||||
|
|
||||||
GENFRONT( addv, BLIS_ADDV_KER )
|
GENFRONT( addv, BLIS_ADDV_KER )
|
||||||
|
GENFRONT( amaxv, BLIS_AMAXV_KER )
|
||||||
GENFRONT( copyv, BLIS_COPYV_KER )
|
GENFRONT( copyv, BLIS_COPYV_KER )
|
||||||
GENFRONT( dotv, BLIS_DOTV_KER )
|
GENFRONT( dotv, BLIS_DOTV_KER )
|
||||||
GENFRONT( dotxv, BLIS_DOTXV_KER )
|
GENFRONT( dotxv, BLIS_DOTXV_KER )
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ); \
|
|||||||
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx );
|
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx );
|
||||||
|
|
||||||
GENPROT( addv )
|
GENPROT( addv )
|
||||||
|
GENPROT( amaxv )
|
||||||
GENPROT( axpbyv )
|
GENPROT( axpbyv )
|
||||||
GENPROT( axpyv )
|
GENPROT( axpyv )
|
||||||
GENPROT( copyv )
|
GENPROT( copyv )
|
||||||
|
|||||||
@@ -58,6 +58,21 @@ INSERT_GENTDEF( addv )
|
|||||||
INSERT_GENTDEF( copyv )
|
INSERT_GENTDEF( copyv )
|
||||||
INSERT_GENTDEF( subv )
|
INSERT_GENTDEF( subv )
|
||||||
|
|
||||||
|
// amaxv
|
||||||
|
|
||||||
|
#undef GENTDEF
|
||||||
|
#define GENTDEF( ctype, ch, opname, tsuf ) \
|
||||||
|
\
|
||||||
|
typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||||
|
( \
|
||||||
|
dim_t n, \
|
||||||
|
ctype* restrict x, inc_t incx, \
|
||||||
|
dim_t* restrict index, \
|
||||||
|
cntx_t* cntx \
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT_GENTDEF( amaxv )
|
||||||
|
|
||||||
// axpbyv
|
// axpbyv
|
||||||
|
|
||||||
#undef GENTDEF
|
#undef GENTDEF
|
||||||
|
|||||||
@@ -54,6 +54,20 @@ INSERT_GENTPROT_BASIC( copyv_ker_name )
|
|||||||
INSERT_GENTPROT_BASIC( subv_ker_name )
|
INSERT_GENTPROT_BASIC( subv_ker_name )
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTPROT
|
||||||
|
#define GENTPROT( ctype, ch, opname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,opname) \
|
||||||
|
( \
|
||||||
|
dim_t n, \
|
||||||
|
ctype* restrict x, inc_t incx, \
|
||||||
|
dim_t* restrict index, \
|
||||||
|
cntx_t* cntx \
|
||||||
|
); \
|
||||||
|
|
||||||
|
INSERT_GENTPROT_BASIC( amaxv_ker_name )
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROT
|
#undef GENTPROT
|
||||||
#define GENTPROT( ctype, ch, opname ) \
|
#define GENTPROT( ctype, ch, opname ) \
|
||||||
\
|
\
|
||||||
|
|||||||
@@ -82,6 +82,44 @@ GENFRONT( copyv )
|
|||||||
GENFRONT( subv )
|
GENFRONT( subv )
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENFRONT
|
||||||
|
#define GENFRONT( opname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(opname,EX_SUF) \
|
||||||
|
( \
|
||||||
|
obj_t* x, \
|
||||||
|
obj_t* index \
|
||||||
|
BLIS_OAPI_CNTX_PARAM \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
BLIS_OAPI_CNTX_DECL \
|
||||||
|
\
|
||||||
|
num_t dt = bli_obj_datatype( *x ); \
|
||||||
|
\
|
||||||
|
dim_t n = bli_obj_vector_dim( *x ); \
|
||||||
|
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||||
|
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||||
|
\
|
||||||
|
void* buf_index = bli_obj_buffer_at_off( *index ); \
|
||||||
|
\
|
||||||
|
if ( bli_error_checking_is_enabled() ) \
|
||||||
|
PASTEMAC(opname,_check)( x, index ); \
|
||||||
|
\
|
||||||
|
/* Invoke the typed function. */ \
|
||||||
|
bli_call_ft_5 \
|
||||||
|
( \
|
||||||
|
dt, \
|
||||||
|
opname, \
|
||||||
|
n, \
|
||||||
|
buf_x, incx, \
|
||||||
|
buf_index, \
|
||||||
|
cntx \
|
||||||
|
); \
|
||||||
|
}
|
||||||
|
|
||||||
|
GENFRONT( amaxv )
|
||||||
|
|
||||||
|
|
||||||
#undef GENFRONT
|
#undef GENFRONT
|
||||||
#define GENFRONT( opname ) \
|
#define GENFRONT( opname ) \
|
||||||
\
|
\
|
||||||
|
|||||||
@@ -52,6 +52,19 @@ GENTPROT( copyv )
|
|||||||
GENTPROT( subv )
|
GENTPROT( subv )
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTPROT
|
||||||
|
#define GENTPROT( opname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(opname,EX_SUF) \
|
||||||
|
( \
|
||||||
|
obj_t* x, \
|
||||||
|
obj_t* index \
|
||||||
|
BLIS_OAPI_CNTX_PARAM \
|
||||||
|
);
|
||||||
|
|
||||||
|
GENTPROT( amaxv )
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROT
|
#undef GENTPROT
|
||||||
#define GENTPROT( opname ) \
|
#define GENTPROT( opname ) \
|
||||||
\
|
\
|
||||||
|
|||||||
@@ -74,6 +74,38 @@ INSERT_GENTFUNC_BASIC( copyv, BLIS_COPYV_KER )
|
|||||||
INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER )
|
INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER )
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTFUNC
|
||||||
|
#define GENTFUNC( ctype, ch, opname, kerid ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,opname) \
|
||||||
|
( \
|
||||||
|
dim_t n, \
|
||||||
|
ctype* x, inc_t incx, \
|
||||||
|
dim_t* index, \
|
||||||
|
cntx_t* cntx \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
const num_t dt = PASTEMAC(ch,type); \
|
||||||
|
cntx_t* cntx_p; \
|
||||||
|
\
|
||||||
|
bli_cntx_init_local_if( opname, cntx, cntx_p ); \
|
||||||
|
\
|
||||||
|
PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \
|
||||||
|
\
|
||||||
|
f \
|
||||||
|
( \
|
||||||
|
n, \
|
||||||
|
x, incx, \
|
||||||
|
index, \
|
||||||
|
cntx_p \
|
||||||
|
); \
|
||||||
|
\
|
||||||
|
bli_cntx_finalize_local_if( opname, cntx ); \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNC_BASIC( amaxv, BLIS_AMAXV_KER )
|
||||||
|
|
||||||
|
|
||||||
#undef GENTFUNC
|
#undef GENTFUNC
|
||||||
#define GENTFUNC( ctype, ch, opname, kerid ) \
|
#define GENTFUNC( ctype, ch, opname, kerid ) \
|
||||||
\
|
\
|
||||||
|
|||||||
@@ -40,6 +40,9 @@
|
|||||||
#undef addv_ker_name
|
#undef addv_ker_name
|
||||||
#define addv_ker_name addv
|
#define addv_ker_name addv
|
||||||
|
|
||||||
|
#undef amaxv_ker_name
|
||||||
|
#define amaxv_ker_name amaxv
|
||||||
|
|
||||||
#undef axpbyv_ker_name
|
#undef axpbyv_ker_name
|
||||||
#define axpbyv_ker_name axpbyv
|
#define axpbyv_ker_name axpbyv
|
||||||
|
|
||||||
|
|||||||
134
frame/1/kernels/bli_amaxv_ref.c
Normal file
134
frame/1/kernels/bli_amaxv_ref.c
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
BLIS
|
||||||
|
An object-based framework for developing high-performance BLAS-like
|
||||||
|
libraries.
|
||||||
|
|
||||||
|
Copyright (C) 2014, The University of Texas at Austin
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
- Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
- Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
- Neither the name of The University of Texas at Austin nor the names
|
||||||
|
of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "blis.h"
|
||||||
|
|
||||||
|
//
|
||||||
|
// Define BLAS-like interfaces with typed operands.
|
||||||
|
//
|
||||||
|
|
||||||
|
#undef GENTFUNCR
|
||||||
|
#define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname) \
|
||||||
|
( \
|
||||||
|
dim_t n, \
|
||||||
|
ctype* x, inc_t incx, \
|
||||||
|
dim_t* i_max, \
|
||||||
|
cntx_t* cntx \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
ctype_r* minus_one = PASTEMAC(chr,m1); \
|
||||||
|
dim_t* zero_i = PASTEMAC(i,0); \
|
||||||
|
\
|
||||||
|
ctype_r chi1_r; \
|
||||||
|
ctype_r chi1_i; \
|
||||||
|
ctype_r abs_chi1; \
|
||||||
|
ctype_r abs_chi1_max; \
|
||||||
|
dim_t i; \
|
||||||
|
\
|
||||||
|
/* Initialize the index of the maximum absolute value to zero. */ \
|
||||||
|
PASTEMAC(i,copys)( zero_i, *i_max ); \
|
||||||
|
\
|
||||||
|
/* If the vector length is zero, return early. This directly emulates
|
||||||
|
the behavior of netlib BLAS's i?amax() routines. */ \
|
||||||
|
if ( bli_zero_dim1( n ) ) return; \
|
||||||
|
\
|
||||||
|
/* Initialize the maximum absolute value search candidate with
|
||||||
|
-1, which is guaranteed to be less than all values we will
|
||||||
|
compute. */ \
|
||||||
|
PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \
|
||||||
|
\
|
||||||
|
if ( incx == 1 ) \
|
||||||
|
{ \
|
||||||
|
for ( i = 0; i < n; ++i ) \
|
||||||
|
{ \
|
||||||
|
/* Get the real and imaginary components of chi1. */ \
|
||||||
|
PASTEMAC2(ch,chr,gets)( x[i], chi1_r, chi1_i ); \
|
||||||
|
\
|
||||||
|
/* Replace chi1_r and chi1_i with their absolute values. */ \
|
||||||
|
PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
|
||||||
|
PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
|
||||||
|
\
|
||||||
|
/* Add the real and imaginary absolute values together. */ \
|
||||||
|
PASTEMAC(chr,set0s)( abs_chi1 ); \
|
||||||
|
PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \
|
||||||
|
PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \
|
||||||
|
\
|
||||||
|
/* If the absolute value of the current element exceeds that of
|
||||||
|
the previous largest, save it and its index. If NaN is
|
||||||
|
encountered, then treat it the same as if it were a valid
|
||||||
|
value that was smaller than any previously seen. This
|
||||||
|
behavior mimics that of LAPACK's ?lange(). */ \
|
||||||
|
if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \
|
||||||
|
{ \
|
||||||
|
abs_chi1_max = abs_chi1; \
|
||||||
|
*i_max = i; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
for ( i = 0; i < n; ++i ) \
|
||||||
|
{ \
|
||||||
|
ctype* chi1 = x + (i )*incx; \
|
||||||
|
\
|
||||||
|
/* Get the real and imaginary components of chi1. */ \
|
||||||
|
PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \
|
||||||
|
\
|
||||||
|
/* Replace chi1_r and chi1_i with their absolute values. */ \
|
||||||
|
PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
|
||||||
|
PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
|
||||||
|
\
|
||||||
|
/* Add the real and imaginary absolute values together. */ \
|
||||||
|
PASTEMAC(chr,set0s)( abs_chi1 ); \
|
||||||
|
PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \
|
||||||
|
PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \
|
||||||
|
\
|
||||||
|
/* If the absolute value of the current element exceeds that of
|
||||||
|
the previous largest, save it and its index. If NaN is
|
||||||
|
encountered, then treat it the same as if it were a valid
|
||||||
|
value that was smaller than any previously seen. This
|
||||||
|
behavior mimics that of LAPACK's ?lange(). */ \
|
||||||
|
if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \
|
||||||
|
{ \
|
||||||
|
abs_chi1_max = abs_chi1; \
|
||||||
|
*i_max = i; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
INSERT_GENTFUNCR_BASIC0( amaxv_ref )
|
||||||
|
|
||||||
@@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
|
#if 0
|
||||||
packv_t* packv_cntl = NULL;
|
packv_t* packv_cntl = NULL;
|
||||||
|
|
||||||
void bli_packv_cntl_init( void )
|
void bli_packv_cntl_init( void )
|
||||||
@@ -77,4 +78,41 @@ void bli_packv_cntl_obj_init( packv_t* cntl,
|
|||||||
cntl->bmid = bmid;
|
cntl->bmid = bmid;
|
||||||
cntl->pack_schema = pack_schema;
|
cntl->pack_schema = pack_schema;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cntl_t* bli_packv_cntl_obj_create
|
||||||
|
(
|
||||||
|
void* var_func,
|
||||||
|
void* packv_var_func,
|
||||||
|
bszid_t bmid,
|
||||||
|
pack_t pack_schema,
|
||||||
|
cntl_t* sub_node
|
||||||
|
)
|
||||||
|
{
|
||||||
|
cntl_t* cntl;
|
||||||
|
packv_params_t* params;
|
||||||
|
|
||||||
|
// Allocate a packv_params_t struct.
|
||||||
|
params = bli_malloc_intl( sizeof( packv_params_t ) );
|
||||||
|
|
||||||
|
// Initialize the packv_params_t struct.
|
||||||
|
params->size = sizeof( packv_params_t );
|
||||||
|
params->packv_var_func = packv_var_func;
|
||||||
|
params->bmid = bmid;
|
||||||
|
params->pack_schema = pack_schema;
|
||||||
|
|
||||||
|
// It's important that we set the bszid field to BLIS_NO_PART to indicate
|
||||||
|
// that no blocksize partitioning is performed. bli_cntl_free() will rely
|
||||||
|
// on this information to know how to step through the thrinfo_t tree in
|
||||||
|
// sync with the cntl_t tree.
|
||||||
|
cntl = bli_cntl_obj_create
|
||||||
|
(
|
||||||
|
BLIS_NO_PART,
|
||||||
|
var_func,
|
||||||
|
params,
|
||||||
|
sub_node
|
||||||
|
);
|
||||||
|
|
||||||
|
return cntl;
|
||||||
|
}
|
||||||
|
|
||||||
67
frame/1/other/packv/bli_packv_cntl.h
Normal file
67
frame/1/other/packv/bli_packv_cntl.h
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
BLIS
|
||||||
|
An object-based framework for developing high-performance BLAS-like
|
||||||
|
libraries.
|
||||||
|
|
||||||
|
Copyright (C) 2014, The University of Texas at Austin
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
- Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
- Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
- Neither the name of The University of Texas at Austin nor the names
|
||||||
|
of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct packv_params_s
|
||||||
|
{
|
||||||
|
uint64_t size
|
||||||
|
packv_voft* var_func;
|
||||||
|
bszid_t bmid;
|
||||||
|
pack_t pack_schema;
|
||||||
|
};
|
||||||
|
typedef struct packv_params_s packv_params_t;
|
||||||
|
|
||||||
|
|
||||||
|
#define bli_cntl_packv_params_var_func( cntl ) \
|
||||||
|
\
|
||||||
|
( (packv_params_t*)( cntl->params )->var_func )
|
||||||
|
|
||||||
|
#define bli_cntl_packv_params_bmid( cntl ) \
|
||||||
|
\
|
||||||
|
( (packv_params_t*)( cntl->params )->bmid_m )
|
||||||
|
|
||||||
|
#define bli_cntl_packv_params_pack_schema( cntl ) \
|
||||||
|
\
|
||||||
|
( (packv_params_t*)( cntl->params )->pack_schema )
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
cntl_t* bli_packv_cntl_obj_create
|
||||||
|
(
|
||||||
|
void* var_func,
|
||||||
|
void* packv_var_func,
|
||||||
|
bszid_t bmid,
|
||||||
|
pack_t pack_schema,
|
||||||
|
cntl_t* sub_node
|
||||||
|
);
|
||||||
|
|
||||||
@@ -52,7 +52,6 @@ void bli_packv_init
|
|||||||
|
|
||||||
pack_t pack_schema;
|
pack_t pack_schema;
|
||||||
bszid_t bmult_id;
|
bszid_t bmult_id;
|
||||||
obj_t c;
|
|
||||||
|
|
||||||
// Check parameters.
|
// Check parameters.
|
||||||
if ( bli_error_checking_is_enabled() )
|
if ( bli_error_checking_is_enabled() )
|
||||||
@@ -84,26 +83,6 @@ void bli_packv_init
|
|||||||
// left is whether we are to typecast vector a before packing.
|
// left is whether we are to typecast vector a before packing.
|
||||||
if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) )
|
if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) )
|
||||||
bli_abort();
|
bli_abort();
|
||||||
/*
|
|
||||||
{
|
|
||||||
// Initialize an object c for the intermediate typecast vector.
|
|
||||||
bli_packv_init_cast( a,
|
|
||||||
p,
|
|
||||||
&c );
|
|
||||||
|
|
||||||
// Copy/typecast vector a to vector c.
|
|
||||||
bli_copyv( a,
|
|
||||||
&c );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
// If no cast is needed, then aliasing object c to the original
|
|
||||||
// vector serves as a minor optimization. This causes the packv
|
|
||||||
// implementation to pack directly from vector a.
|
|
||||||
bli_obj_alias_to( *a, c );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Extract various fields from the control tree and pass them in
|
// Extract various fields from the control tree and pass them in
|
||||||
// explicitly into _init_pack(). This allows external code generators
|
// explicitly into _init_pack(). This allows external code generators
|
||||||
@@ -116,7 +95,7 @@ void bli_packv_init
|
|||||||
(
|
(
|
||||||
pack_schema,
|
pack_schema,
|
||||||
bmult_id,
|
bmult_id,
|
||||||
&c,
|
&a,
|
||||||
p,
|
p,
|
||||||
cntx
|
cntx
|
||||||
);
|
);
|
||||||
@@ -125,22 +104,24 @@ void bli_packv_init
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void bli_packv_init_pack
|
siz_t bli_packv_init_pack
|
||||||
(
|
(
|
||||||
pack_t pack_schema,
|
pack_t schema,
|
||||||
bszid_t bmult_id,
|
bszid_t bmult_id,
|
||||||
obj_t* c,
|
obj_t* a,
|
||||||
obj_t* p,
|
obj_t* p,
|
||||||
cntx_t* cntx
|
cntx_t* cntx
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
num_t dt = bli_obj_datatype( *c );
|
num_t dt = bli_obj_datatype( *a );
|
||||||
dim_t dim_c = bli_obj_vector_dim( *c );
|
dim_t dim_a = bli_obj_vector_dim( *a );
|
||||||
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );
|
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );
|
||||||
|
|
||||||
membrk_t* membrk = bli_cntx_membrk( cntx );
|
membrk_t* membrk = bli_cntx_membrk( cntx );
|
||||||
|
|
||||||
|
#if 0
|
||||||
mem_t* mem_p;
|
mem_t* mem_p;
|
||||||
|
#endif
|
||||||
dim_t m_p_pad;
|
dim_t m_p_pad;
|
||||||
siz_t size_p;
|
siz_t size_p;
|
||||||
inc_t rs_p, cs_p;
|
inc_t rs_p, cs_p;
|
||||||
@@ -148,21 +129,17 @@ void bli_packv_init_pack
|
|||||||
|
|
||||||
|
|
||||||
// We begin by copying the basic fields of c.
|
// We begin by copying the basic fields of c.
|
||||||
bli_obj_alias_to( *c, *p );
|
bli_obj_alias_to( *a, *p );
|
||||||
|
|
||||||
// Update the dimensions.
|
// Update the dimensions.
|
||||||
bli_obj_set_dims( dim_c, 1, *p );
|
bli_obj_set_dims( dim_a, 1, *p );
|
||||||
|
|
||||||
// Reset the view offsets to (0,0).
|
// Reset the view offsets to (0,0).
|
||||||
bli_obj_set_offs( 0, 0, *p );
|
bli_obj_set_offs( 0, 0, *p );
|
||||||
|
|
||||||
// Set the pack schema in the p object to the value in the control tree
|
// Set the pack schema in the p object to the value in the control tree
|
||||||
// node.
|
// node.
|
||||||
bli_obj_set_pack_schema( pack_schema, *p );
|
bli_obj_set_pack_schema( schema, *p );
|
||||||
|
|
||||||
// Extract the address of the mem_t object within p that will track
|
|
||||||
// properties of the packed buffer.
|
|
||||||
mem_p = bli_obj_pack_mem( *p );
|
|
||||||
|
|
||||||
// Compute the dimensions padded by the dimension multiples.
|
// Compute the dimensions padded by the dimension multiples.
|
||||||
m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( *p ), bmult );
|
m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( *p ), bmult );
|
||||||
@@ -170,6 +147,11 @@ void bli_packv_init_pack
|
|||||||
// Compute the size of the packed buffer.
|
// Compute the size of the packed buffer.
|
||||||
size_p = m_p_pad * 1 * bli_obj_elem_size( *p );
|
size_p = m_p_pad * 1 * bli_obj_elem_size( *p );
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Extract the address of the mem_t object within p that will track
|
||||||
|
// properties of the packed buffer.
|
||||||
|
mem_p = bli_obj_pack_mem( *p );
|
||||||
|
|
||||||
if ( bli_mem_is_unalloc( mem_p ) )
|
if ( bli_mem_is_unalloc( mem_p ) )
|
||||||
{
|
{
|
||||||
// If the mem_t object of p has not yet been allocated, then acquire
|
// If the mem_t object of p has not yet been allocated, then acquire
|
||||||
@@ -192,19 +174,19 @@ void bli_packv_init_pack
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save the padded (packed) dimensions into the packed object.
|
|
||||||
bli_obj_set_padded_dims( m_p_pad, 1, *p );
|
|
||||||
|
|
||||||
// Grab the buffer address from the mem_t object and copy it to the
|
// Grab the buffer address from the mem_t object and copy it to the
|
||||||
// main object buffer field. (Sometimes this buffer address will be
|
// main object buffer field. (Sometimes this buffer address will be
|
||||||
// copied when the value is already up-to-date, because it persists
|
// copied when the value is already up-to-date, because it persists
|
||||||
// in the main object buffer field across loop iterations.)
|
// in the main object buffer field across loop iterations.)
|
||||||
buf = bli_mem_buffer( mem_p );
|
buf = bli_mem_buffer( mem_p );
|
||||||
bli_obj_set_buffer( buf, *p );
|
bli_obj_set_buffer( buf, *p );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Save the padded (packed) dimensions into the packed object.
|
||||||
|
bli_obj_set_padded_dims( m_p_pad, 1, *p );
|
||||||
|
|
||||||
// Set the row and column strides of p based on the pack schema.
|
// Set the row and column strides of p based on the pack schema.
|
||||||
if ( pack_schema == BLIS_PACKED_VECTOR )
|
if ( schema == BLIS_PACKED_VECTOR )
|
||||||
{
|
{
|
||||||
// Set the strides to reflect a column-stored vector. Note that the
|
// Set the strides to reflect a column-stored vector. Note that the
|
||||||
// column stride may never be used, and is only useful to determine
|
// column stride may never be used, and is only useful to determine
|
||||||
@@ -215,8 +197,11 @@ void bli_packv_init_pack
|
|||||||
|
|
||||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return size_p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
void bli_packv_release
|
void bli_packv_release
|
||||||
(
|
(
|
||||||
obj_t* p,
|
obj_t* p,
|
||||||
@@ -226,52 +211,4 @@ void bli_packv_release
|
|||||||
if ( !bli_cntl_is_noop( cntl ) )
|
if ( !bli_cntl_is_noop( cntl ) )
|
||||||
bli_obj_release_pack( p );
|
bli_obj_release_pack( p );
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
void bli_packv_init_cast( obj_t* a,
|
|
||||||
obj_t* p,
|
|
||||||
obj_t* c )
|
|
||||||
{
|
|
||||||
// The idea here is that we want to create an object c that is identical
|
|
||||||
// to object a, except that:
|
|
||||||
// (1) the storage datatype of c is equal to the target datatype of a,
|
|
||||||
// with the element size of c adjusted accordingly,
|
|
||||||
// (2) object c is marked as being stored in a standard, contiguous
|
|
||||||
// format (ie: a column vector),
|
|
||||||
// (3) the view offset of c is reset to (0,0), and
|
|
||||||
// (4) object c's main buffer is set to a new memory region acquired
|
|
||||||
// from the memory manager, or extracted from p if a mem entry is
|
|
||||||
// already available. (After acquring a mem entry from the memory
|
|
||||||
// manager, it is cached within p for quick access later on.)
|
|
||||||
|
|
||||||
num_t dt_targ_a = bli_obj_target_datatype( *a );
|
|
||||||
dim_t dim_a = bli_obj_vector_dim( *a );
|
|
||||||
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
|
|
||||||
|
|
||||||
// We begin by copying the basic fields of a.
|
|
||||||
bli_obj_alias_to( *a, *c );
|
|
||||||
|
|
||||||
// Update datatype and element size fields.
|
|
||||||
bli_obj_set_datatype( dt_targ_a, *c );
|
|
||||||
bli_obj_set_elem_size( elem_size_c, *c );
|
|
||||||
|
|
||||||
// Update the dimensions.
|
|
||||||
bli_obj_set_dims( dim_a, 1, *c );
|
|
||||||
|
|
||||||
// Reset the view offsets to (0,0).
|
|
||||||
bli_obj_set_offs( 0, 0, *c );
|
|
||||||
|
|
||||||
// Check the mem_t entry of p associated with the cast buffer. If it is
|
|
||||||
// NULL, then acquire memory sufficient to hold the object data and cache
|
|
||||||
// it to p. (Otherwise, if it is non-NULL, then memory has already been
|
|
||||||
// acquired from the memory manager and cached.) We then set the main
|
|
||||||
// buffer of c to the cached address of the cast memory.
|
|
||||||
bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
|
|
||||||
|
|
||||||
// Update the strides. We set the increments to reflect a column storage.
|
|
||||||
// Note that the column stride should never be used.
|
|
||||||
bli_obj_set_strides( 1, dim_a, *c );
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
@@ -40,23 +40,12 @@ void bli_packv_init
|
|||||||
packv_t* cntl
|
packv_t* cntl
|
||||||
);
|
);
|
||||||
|
|
||||||
void bli_packv_init_pack
|
siz_t bli_packv_init_pack
|
||||||
(
|
(
|
||||||
pack_t pack_schema,
|
pack_t pack_schema,
|
||||||
bszid_t bmult_id,
|
bszid_t bmult_id,
|
||||||
obj_t* c,
|
obj_t* a,
|
||||||
obj_t* p,
|
obj_t* p,
|
||||||
cntx_t* cntx
|
cntx_t* cntx
|
||||||
);
|
);
|
||||||
|
|
||||||
void bli_packv_release
|
|
||||||
(
|
|
||||||
obj_t* p,
|
|
||||||
packv_t* cntl
|
|
||||||
);
|
|
||||||
|
|
||||||
/*
|
|
||||||
void bli_packv_init_cast( obj_t* a,
|
|
||||||
obj_t* p,
|
|
||||||
obj_t* c );
|
|
||||||
*/
|
|
||||||
@@ -47,27 +47,23 @@ static FUNCPTR_T vars[1][3] =
|
|||||||
{ bli_packv_unb_var1, NULL, NULL }
|
{ bli_packv_unb_var1, NULL, NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
void bli_packv_int( obj_t* a,
|
void bli_packv_int
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* a,
|
||||||
packv_t* cntl )
|
obj_t* p,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl
|
||||||
|
)
|
||||||
{
|
{
|
||||||
// The packv operation consists of an optional typecasting pre-process.
|
#if 0
|
||||||
// Here are the following possible ways packv can execute:
|
|
||||||
// 1. cast and pack: When typecasting and packing are both
|
|
||||||
// precribed, typecast a to temporary vector c and then pack
|
|
||||||
// c to p.
|
|
||||||
// 2. pack only: Typecasting is skipped when it is not needed;
|
|
||||||
// simply pack a directly to p.
|
|
||||||
// 3. cast only: Not yet supported / not used.
|
|
||||||
// 4. no-op: The control tree sometimes directs us to skip the
|
|
||||||
// pack operation entirely. Alias p to a and return.
|
|
||||||
|
|
||||||
//obj_t c;
|
|
||||||
|
|
||||||
varnum_t n;
|
varnum_t n;
|
||||||
impl_t i;
|
impl_t i;
|
||||||
FUNCPTR_T f;
|
#endif
|
||||||
|
packv_voft f;
|
||||||
|
|
||||||
|
// !!!
|
||||||
|
// DEFINE packv_voft type.
|
||||||
|
// !!!
|
||||||
|
|
||||||
// Check parameters.
|
// Check parameters.
|
||||||
if ( bli_error_checking_is_enabled() )
|
if ( bli_error_checking_is_enabled() )
|
||||||
@@ -36,6 +36,7 @@
|
|||||||
#include "bli_l1m_check.h"
|
#include "bli_l1m_check.h"
|
||||||
|
|
||||||
#include "bli_l1m_ft.h"
|
#include "bli_l1m_ft.h"
|
||||||
|
#include "bli_l1m_voft.h"
|
||||||
|
|
||||||
// Prototype object APIs with and without contexts.
|
// Prototype object APIs with and without contexts.
|
||||||
#include "bli_oapi_w_cntx.h"
|
#include "bli_oapi_w_cntx.h"
|
||||||
@@ -51,6 +52,5 @@
|
|||||||
#include "bli_unpackm.h"
|
#include "bli_unpackm.h"
|
||||||
|
|
||||||
// Other
|
// Other
|
||||||
#include "bli_scalm_cntl.h"
|
#include "bli_scalm.h"
|
||||||
#include "bli_scalm_int.h"
|
|
||||||
|
|
||||||
|
|||||||
75
frame/1m/bli_l1m_voft.h
Normal file
75
frame/1m/bli_l1m_voft.h
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
BLIS
|
||||||
|
An object-based framework for developing high-performance BLAS-like
|
||||||
|
libraries.
|
||||||
|
|
||||||
|
Copyright (C) 2014, The University of Texas at Austin
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
- Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
- Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
- Neither the name of The University of Texas at Austin nor the names
|
||||||
|
of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BLIS_L1M_VAR_OFT_H
|
||||||
|
#define BLIS_L1M_VAR_OFT_H
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// -- Level-3 variant function types -------------------------------------------
|
||||||
|
//
|
||||||
|
|
||||||
|
#undef GENTDEF
|
||||||
|
#define GENTDEF( opname ) \
|
||||||
|
\
|
||||||
|
typedef void (*PASTECH(opname,_voft)) \
|
||||||
|
( \
|
||||||
|
obj_t* a, \
|
||||||
|
obj_t* p, \
|
||||||
|
cntx_t* cntx, \
|
||||||
|
cntl_t* cntl, \
|
||||||
|
thrinfo_t* thread \
|
||||||
|
);
|
||||||
|
|
||||||
|
GENTDEF( packm )
|
||||||
|
|
||||||
|
|
||||||
|
#undef GENTDEF
|
||||||
|
#define GENTDEF( opname ) \
|
||||||
|
\
|
||||||
|
typedef void (*PASTECH(opname,_voft)) \
|
||||||
|
( \
|
||||||
|
obj_t* p, \
|
||||||
|
obj_t* a, \
|
||||||
|
cntx_t* cntx, \
|
||||||
|
cntl_t* cntl, \
|
||||||
|
thrinfo_t* thread \
|
||||||
|
);
|
||||||
|
|
||||||
|
GENTDEF( unpackm )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
@@ -93,10 +93,14 @@ static func_t packm_struc_cxk_kers[BLIS_NUM_PACK_SCHEMA_TYPES] =
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
void bli_packm_blk_var1( obj_t* c,
|
void bli_packm_blk_var1
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* c,
|
||||||
thrinfo_t* t )
|
obj_t* p,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* t
|
||||||
|
)
|
||||||
{
|
{
|
||||||
num_t dt_cp = bli_obj_datatype( *c );
|
num_t dt_cp = bli_obj_datatype( *c );
|
||||||
|
|
||||||
@@ -140,7 +144,7 @@ void bli_packm_blk_var1( obj_t* c,
|
|||||||
// whether we are executing an induced method.
|
// whether we are executing an induced method.
|
||||||
if ( bli_is_nat_packed( schema ) )
|
if ( bli_is_nat_packed( schema ) )
|
||||||
{
|
{
|
||||||
// This branch if for native execution, where we assume that
|
// This branch is for native execution, where we assume that
|
||||||
// the micro-kernel will always apply the alpha scalar of the
|
// the micro-kernel will always apply the alpha scalar of the
|
||||||
// higher-level operation. Thus, we use BLIS_ONE for kappa so
|
// higher-level operation. Thus, we use BLIS_ONE for kappa so
|
||||||
// that the underlying packm implementation does not perform
|
// that the underlying packm implementation does not perform
|
||||||
@@ -156,28 +160,25 @@ void bli_packm_blk_var1( obj_t* c,
|
|||||||
// real domain micro-kernels. (In the aforementioned situation,
|
// real domain micro-kernels. (In the aforementioned situation,
|
||||||
// applying a real scalar is easy, but applying a complex one is
|
// applying a real scalar is easy, but applying a complex one is
|
||||||
// harder, so we avoid the need altogether with the code below.)
|
// harder, so we avoid the need altogether with the code below.)
|
||||||
if( bli_thread_am_ochief( t ) )
|
if ( bli_obj_scalar_has_nonzero_imag( p ) )
|
||||||
{
|
{
|
||||||
if ( bli_obj_scalar_has_nonzero_imag( p ) )
|
//printf( "applying non-zero imag kappa\n" );
|
||||||
{
|
|
||||||
//printf( "applying non-zero imag kappa\n" );
|
// Detach the scalar.
|
||||||
// Detach the scalar.
|
bli_obj_scalar_detach( p, &kappa );
|
||||||
bli_obj_scalar_detach( p, &kappa );
|
|
||||||
|
// Reset the attached scalar (to 1.0).
|
||||||
// Reset the attached scalar (to 1.0).
|
bli_obj_scalar_reset( p );
|
||||||
bli_obj_scalar_reset( p );
|
|
||||||
|
kappa_p = κ
|
||||||
kappa_p = κ
|
}
|
||||||
}
|
else
|
||||||
else
|
{
|
||||||
{
|
// If the internal scalar of A has only a real component, then
|
||||||
// If the internal scalar of A has only a real component, then
|
// we will apply it later (in the micro-kernel), and so we will
|
||||||
// we will apply it later (in the micro-kernel), and so we will
|
// use BLIS_ONE to indicate no scaling during packing.
|
||||||
// use BLIS_ONE to indicate no scaling during packing.
|
kappa_p = &BLIS_ONE;
|
||||||
kappa_p = &BLIS_ONE;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
kappa_p = bli_thread_obroadcast( t, kappa_p );
|
|
||||||
|
|
||||||
// Acquire the buffer to the kappa chosen above.
|
// Acquire the buffer to the kappa chosen above.
|
||||||
buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
|
buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
|
||||||
@@ -194,7 +195,12 @@ void bli_packm_blk_var1( obj_t* c,
|
|||||||
bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers;
|
bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers;
|
||||||
else packm_kers = packm_struc_cxk_kers;
|
else packm_kers = packm_struc_cxk_kers;
|
||||||
#else
|
#else
|
||||||
func_t* cntx_packm_kers = bli_cntx_get_packm_ukr( cntx );
|
// The original idea here was to read the packm_ukr from the context
|
||||||
|
// if it is non-NULL. The problem is, it requires that we be able to
|
||||||
|
// assume that the packm_ukr field is initialized to NULL, which it
|
||||||
|
// currently is not.
|
||||||
|
|
||||||
|
//func_t* cntx_packm_kers = bli_cntx_get_packm_ukr( cntx );
|
||||||
|
|
||||||
//if ( bli_func_is_null_dt( dt_cp, cntx_packm_kers ) )
|
//if ( bli_func_is_null_dt( dt_cp, cntx_packm_kers ) )
|
||||||
{
|
{
|
||||||
@@ -203,7 +209,6 @@ void bli_packm_blk_var1( obj_t* c,
|
|||||||
// we use the default lookup table to determine the right func_t
|
// we use the default lookup table to determine the right func_t
|
||||||
// for the current schema.
|
// for the current schema.
|
||||||
const dim_t i = bli_pack_schema_index( schema );
|
const dim_t i = bli_pack_schema_index( schema );
|
||||||
//printf( "bli_packm_blk_var1: pack schema index = %lu (schema = %x)\n", i, schema );
|
|
||||||
|
|
||||||
packm_kers = &packm_struc_cxk_kers[ i ];
|
packm_kers = &packm_struc_cxk_kers[ i ];
|
||||||
}
|
}
|
||||||
@@ -221,11 +226,6 @@ void bli_packm_blk_var1( obj_t* c,
|
|||||||
// Query the datatype-specific function pointer from the func_t object.
|
// Query the datatype-specific function pointer from the func_t object.
|
||||||
packm_ker = bli_func_get_dt( dt_cp, packm_kers );
|
packm_ker = bli_func_get_dt( dt_cp, packm_kers );
|
||||||
|
|
||||||
|
|
||||||
//bli_cntx_print( cntx );
|
|
||||||
//printf( "bli_packm_blk_var1: packm_ker = %p\n", packm_ker );
|
|
||||||
//printf( "bli_packm_blk_var1: cntx_packm_ker = %p\n", cntx_packm_kers );
|
|
||||||
//printf( "bli_packm_blk_var1: local_table_entry = %p\n", &packm_struc_cxk_kers[ bli_pack_schema_index( schema ) ] );
|
|
||||||
// Index into the type combination array to extract the correct
|
// Index into the type combination array to extract the correct
|
||||||
// function pointer.
|
// function pointer.
|
||||||
f = ftypes[dt_cp];
|
f = ftypes[dt_cp];
|
||||||
@@ -598,6 +598,57 @@ PASTEMAC(ch,fprintm)( stdout, "packm_var2: a", m, n, \
|
|||||||
p_inc = ps_p; \
|
p_inc = ps_p; \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
|
/*
|
||||||
|
if ( col_stored ) { \
|
||||||
|
if ( bli_thread_work_id( thread ) == 0 ) \
|
||||||
|
{ \
|
||||||
|
printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
|
||||||
|
fflush( stdout ); \
|
||||||
|
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \
|
||||||
|
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
|
||||||
|
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \
|
||||||
|
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
|
||||||
|
fflush( stdout ); \
|
||||||
|
} \
|
||||||
|
bli_thread_obarrier( thread ); \
|
||||||
|
if ( bli_thread_work_id( thread ) == 1 ) \
|
||||||
|
{ \
|
||||||
|
printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
|
||||||
|
fflush( stdout ); \
|
||||||
|
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \
|
||||||
|
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
|
||||||
|
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \
|
||||||
|
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
|
||||||
|
fflush( stdout ); \
|
||||||
|
} \
|
||||||
|
bli_thread_obarrier( thread ); \
|
||||||
|
} \
|
||||||
|
else { \
|
||||||
|
if ( bli_thread_work_id( thread ) == 0 ) \
|
||||||
|
{ \
|
||||||
|
printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
|
||||||
|
fflush( stdout ); \
|
||||||
|
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \
|
||||||
|
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
|
||||||
|
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \
|
||||||
|
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
|
||||||
|
fflush( stdout ); \
|
||||||
|
} \
|
||||||
|
bli_thread_obarrier( thread ); \
|
||||||
|
if ( bli_thread_work_id( thread ) == 1 ) \
|
||||||
|
{ \
|
||||||
|
printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
|
||||||
|
fflush( stdout ); \
|
||||||
|
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \
|
||||||
|
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
|
||||||
|
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \
|
||||||
|
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
|
||||||
|
fflush( stdout ); \
|
||||||
|
} \
|
||||||
|
bli_thread_obarrier( thread ); \
|
||||||
|
} \
|
||||||
|
*/ \
|
||||||
|
\
|
||||||
/*
|
/*
|
||||||
if ( bli_is_4mi_packed( schema ) ) { \
|
if ( bli_is_4mi_packed( schema ) ) { \
|
||||||
printf( "packm_var2: is_p_use = %lu\n", is_p_use ); \
|
printf( "packm_var2: is_p_use = %lu\n", is_p_use ); \
|
||||||
|
|||||||
@@ -32,10 +32,14 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void bli_packm_blk_var1( obj_t* c,
|
void bli_packm_blk_var1
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* c,
|
||||||
thrinfo_t* t );
|
obj_t* p,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* t
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROT
|
#undef GENTPROT
|
||||||
|
|||||||
@@ -35,9 +35,12 @@
|
|||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
|
|
||||||
void bli_packm_init_check( obj_t* a,
|
void bli_packm_init_check
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx )
|
obj_t* a,
|
||||||
|
obj_t* p,
|
||||||
|
cntx_t* cntx
|
||||||
|
)
|
||||||
{
|
{
|
||||||
err_t e_val;
|
err_t e_val;
|
||||||
|
|
||||||
@@ -54,9 +57,12 @@ void bli_packm_init_check( obj_t* a,
|
|||||||
//bli_check_error_code( e_val );
|
//bli_check_error_code( e_val );
|
||||||
}
|
}
|
||||||
|
|
||||||
void bli_packm_int_check( obj_t* a,
|
void bli_packm_int_check
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx )
|
obj_t* a,
|
||||||
|
obj_t* p,
|
||||||
|
cntx_t* cntx
|
||||||
|
)
|
||||||
{
|
{
|
||||||
err_t e_val;
|
err_t e_val;
|
||||||
|
|
||||||
|
|||||||
@@ -32,10 +32,17 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void bli_packm_init_check( obj_t* a,
|
void bli_packm_init_check
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx );
|
obj_t* a,
|
||||||
|
obj_t* p,
|
||||||
|
cntx_t* cntx
|
||||||
|
);
|
||||||
|
|
||||||
|
void bli_packm_int_check
|
||||||
|
(
|
||||||
|
obj_t* a,
|
||||||
|
obj_t* p,
|
||||||
|
cntx_t* cntx
|
||||||
|
);
|
||||||
|
|
||||||
void bli_packm_int_check( obj_t* a,
|
|
||||||
obj_t* p,
|
|
||||||
cntx_t* cntx );
|
|
||||||
|
|||||||
@@ -34,109 +34,49 @@
|
|||||||
|
|
||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
packm_t* packm_cntl_row = NULL;
|
cntl_t* bli_packm_cntl_obj_create
|
||||||
packm_t* packm_cntl_col = NULL;
|
(
|
||||||
|
void* var_func,
|
||||||
packm_t* packm_cntl = NULL;
|
void* packm_var_func,
|
||||||
|
bszid_t bmid_m,
|
||||||
void bli_packm_cntl_init()
|
bszid_t bmid_n,
|
||||||
|
bool_t does_invert_diag,
|
||||||
|
bool_t rev_iter_if_upper,
|
||||||
|
bool_t rev_iter_if_lower,
|
||||||
|
pack_t pack_schema,
|
||||||
|
packbuf_t pack_buf_type,
|
||||||
|
cntl_t* sub_node
|
||||||
|
)
|
||||||
{
|
{
|
||||||
// Generally speaking, the BLIS_PACKED_ROWS and BLIS_PACKED_COLUMNS
|
cntl_t* cntl;
|
||||||
// are used by the level-2 operations. These schemas amount to simple
|
packm_params_t* params;
|
||||||
// copies to row or column storage. These simple schemas may be used
|
|
||||||
// by level-3 operations, but they should never be used for matrices
|
|
||||||
// with structure (since they do not densify).
|
|
||||||
// The BLIS_PACKED_ROW_PANELS and BLIS_PACKED_COL_PANELS schemas are
|
|
||||||
// used only in level-3 operations. They pack to (typically) skinny
|
|
||||||
// row and column panels, where the width of the panel is determined
|
|
||||||
// by register blocksizes. It is assumed that matrices with structure
|
|
||||||
// will be densified.
|
|
||||||
|
|
||||||
// Create control trees to pack by rows.
|
// Allocate a packm_params_t struct.
|
||||||
packm_cntl_row
|
params = bli_malloc_intl( sizeof( packm_params_t ) );
|
||||||
=
|
|
||||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
|
||||||
BLIS_VARIANT1, // When packing to rows:
|
|
||||||
BLIS_VF, // used for m dimension
|
|
||||||
BLIS_VF, // used for n dimension
|
|
||||||
FALSE, // do NOT invert diagonal
|
|
||||||
FALSE, // do NOT iterate backwards if upper
|
|
||||||
FALSE, // do NOT iterate backwards if lower
|
|
||||||
BLIS_PACKED_ROWS,
|
|
||||||
BLIS_BUFFER_FOR_GEN_USE );
|
|
||||||
|
|
||||||
|
// Initialize the packm_params_t struct.
|
||||||
|
params->size = sizeof( packm_params_t );
|
||||||
|
params->var_func = packm_var_func;
|
||||||
|
params->bmid_m = bmid_m;
|
||||||
|
params->bmid_n = bmid_n;
|
||||||
|
params->does_invert_diag = does_invert_diag;
|
||||||
|
params->rev_iter_if_upper = rev_iter_if_upper;
|
||||||
|
params->rev_iter_if_lower = rev_iter_if_lower;
|
||||||
|
params->pack_schema = pack_schema;
|
||||||
|
params->pack_buf_type = pack_buf_type;
|
||||||
|
|
||||||
// Create control trees to pack by columns.
|
// It's important that we set the bszid field to BLIS_NO_PART to indicate
|
||||||
packm_cntl_col
|
// that no blocksize partitioning is performed. bli_cntl_free() will rely
|
||||||
=
|
// on this information to know how to step through the thrinfo_t tree in
|
||||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
// sync with the cntl_t tree.
|
||||||
BLIS_VARIANT1, // When packing to columns:
|
cntl = bli_cntl_obj_create
|
||||||
BLIS_VF, // used for m dimension
|
(
|
||||||
BLIS_VF, // used for n dimension
|
BLIS_NO_PART,
|
||||||
FALSE, // do NOT invert diagonal
|
var_func,
|
||||||
FALSE, // do NOT iterate backwards if upper
|
params,
|
||||||
FALSE, // do NOT iterate backwards if lower
|
sub_node
|
||||||
BLIS_PACKED_COLUMNS,
|
);
|
||||||
BLIS_BUFFER_FOR_GEN_USE );
|
|
||||||
|
|
||||||
|
|
||||||
// Set defaults when we don't care whether the packing is by rows or
|
|
||||||
// by columns.
|
|
||||||
packm_cntl = packm_cntl_col;
|
|
||||||
}
|
|
||||||
|
|
||||||
void bli_packm_cntl_finalize()
|
|
||||||
{
|
|
||||||
bli_cntl_obj_free( packm_cntl_row );
|
|
||||||
bli_cntl_obj_free( packm_cntl_col );
|
|
||||||
}
|
|
||||||
|
|
||||||
packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
|
|
||||||
varnum_t var_num,
|
|
||||||
bszid_t bmid_m,
|
|
||||||
bszid_t bmid_n,
|
|
||||||
bool_t does_invert_diag,
|
|
||||||
bool_t rev_iter_if_upper,
|
|
||||||
bool_t rev_iter_if_lower,
|
|
||||||
pack_t pack_schema,
|
|
||||||
packbuf_t pack_buf_type )
|
|
||||||
{
|
|
||||||
packm_t* cntl;
|
|
||||||
|
|
||||||
cntl = ( packm_t* ) bli_malloc_intl( sizeof(packm_t) );
|
|
||||||
|
|
||||||
cntl->impl_type = impl_type;
|
|
||||||
cntl->var_num = var_num;
|
|
||||||
cntl->bmid_m = bmid_m;
|
|
||||||
cntl->bmid_n = bmid_n;
|
|
||||||
cntl->does_invert_diag = does_invert_diag;
|
|
||||||
cntl->rev_iter_if_upper = rev_iter_if_upper;
|
|
||||||
cntl->rev_iter_if_lower = rev_iter_if_lower;
|
|
||||||
cntl->pack_schema = pack_schema;
|
|
||||||
cntl->pack_buf_type = pack_buf_type;
|
|
||||||
|
|
||||||
return cntl;
|
return cntl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bli_packm_cntl_obj_init( packm_t* cntl,
|
|
||||||
impl_t impl_type,
|
|
||||||
varnum_t var_num,
|
|
||||||
bszid_t bmid_m,
|
|
||||||
bszid_t bmid_n,
|
|
||||||
bool_t does_invert_diag,
|
|
||||||
bool_t rev_iter_if_upper,
|
|
||||||
bool_t rev_iter_if_lower,
|
|
||||||
pack_t pack_schema,
|
|
||||||
packbuf_t pack_buf_type )
|
|
||||||
{
|
|
||||||
cntl->impl_type = impl_type;
|
|
||||||
cntl->var_num = var_num;
|
|
||||||
cntl->bmid_m = bmid_m;
|
|
||||||
cntl->bmid_n = bmid_n;
|
|
||||||
cntl->does_invert_diag = does_invert_diag;
|
|
||||||
cntl->rev_iter_if_upper = rev_iter_if_upper;
|
|
||||||
cntl->rev_iter_if_lower = rev_iter_if_lower;
|
|
||||||
cntl->pack_schema = pack_schema;
|
|
||||||
cntl->pack_buf_type = pack_buf_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|||||||
@@ -32,56 +32,65 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct packm_s
|
struct packm_params_s
|
||||||
{
|
{
|
||||||
impl_t impl_type;
|
uint64_t size; // size field must be present and come first.
|
||||||
varnum_t var_num;
|
packm_voft var_func;
|
||||||
bszid_t bmid_m;
|
bszid_t bmid_m;
|
||||||
bszid_t bmid_n;
|
bszid_t bmid_n;
|
||||||
bool_t does_invert_diag;
|
bool_t does_invert_diag;
|
||||||
bool_t rev_iter_if_upper;
|
bool_t rev_iter_if_upper;
|
||||||
bool_t rev_iter_if_lower;
|
bool_t rev_iter_if_lower;
|
||||||
pack_t pack_schema;
|
pack_t pack_schema;
|
||||||
packbuf_t pack_buf_type;
|
packbuf_t pack_buf_type;
|
||||||
};
|
};
|
||||||
typedef struct packm_s packm_t;
|
typedef struct packm_params_s packm_params_t;
|
||||||
|
|
||||||
#define cntl_bmid_m( cntl ) cntl->bmid_m
|
#define bli_cntl_packm_params_var_func( cntl ) \
|
||||||
#define cntl_bmid_n( cntl ) cntl->bmid_n
|
\
|
||||||
|
( ( (packm_params_t*)(cntl)->params )->var_func )
|
||||||
|
|
||||||
#define cntl_does_invert_diag( cntl ) cntl->does_invert_diag
|
#define bli_cntl_packm_params_bmid_m( cntl ) \
|
||||||
#define cntl_rev_iter_if_upper( cntl ) cntl->rev_iter_if_upper
|
\
|
||||||
#define cntl_rev_iter_if_lower( cntl ) cntl->rev_iter_if_lower
|
( ( (packm_params_t*)(cntl)->params )->bmid_m )
|
||||||
#define cntl_pack_schema( cntl ) cntl->pack_schema
|
|
||||||
#define cntl_pack_buf_type( cntl ) cntl->pack_buf_type
|
|
||||||
|
|
||||||
#define bli_cntl_sub_packm( cntl ) cntl->sub_packm
|
#define bli_cntl_packm_params_bmid_n( cntl ) \
|
||||||
#define bli_cntl_sub_packm_a( cntl ) cntl->sub_packm_a
|
\
|
||||||
#define bli_cntl_sub_packm_a11( cntl ) cntl->sub_packm_a11
|
( ( (packm_params_t*)(cntl)->params )->bmid_n )
|
||||||
#define bli_cntl_sub_packm_b( cntl ) cntl->sub_packm_b
|
|
||||||
#define bli_cntl_sub_packm_b11( cntl ) cntl->sub_packm_b11
|
|
||||||
#define bli_cntl_sub_packm_c( cntl ) cntl->sub_packm_c
|
|
||||||
#define bli_cntl_sub_packm_c11( cntl ) cntl->sub_packm_c11
|
|
||||||
|
|
||||||
void bli_packm_cntl_init( void );
|
#define bli_cntl_packm_params_does_invert_diag( cntl ) \
|
||||||
void bli_packm_cntl_finalize( void );
|
\
|
||||||
packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
|
( ( (packm_params_t*)(cntl)->params )->does_invert_diag )
|
||||||
varnum_t var_num,
|
|
||||||
bszid_t bmid_m,
|
#define bli_cntl_packm_params_rev_iter_if_upper( cntl ) \
|
||||||
bszid_t bmid_n,
|
\
|
||||||
bool_t does_invert_diag,
|
( ( (packm_params_t*)(cntl)->params )->rev_iter_if_upper )
|
||||||
bool_t rev_iter_if_upper,
|
|
||||||
bool_t rev_iter_if_lower,
|
#define bli_cntl_packm_params_rev_iter_if_lower( cntl ) \
|
||||||
pack_t pack_schema,
|
\
|
||||||
packbuf_t pack_buf_type );
|
( ( (packm_params_t*)(cntl)->params )->rev_iter_if_lower )
|
||||||
void bli_packm_cntl_obj_init( packm_t* cntl,
|
|
||||||
impl_t impl_type,
|
#define bli_cntl_packm_params_pack_schema( cntl ) \
|
||||||
varnum_t var_num,
|
\
|
||||||
bszid_t bmid_m,
|
( ( (packm_params_t*)(cntl)->params )->pack_schema )
|
||||||
bszid_t bmid_n,
|
|
||||||
bool_t does_invert_diag,
|
#define bli_cntl_packm_params_pack_buf_type( cntl ) \
|
||||||
bool_t rev_iter_if_upper,
|
\
|
||||||
bool_t rev_iter_if_lower,
|
( ( (packm_params_t*)(cntl)->params )->pack_buf_type )
|
||||||
pack_t pack_schema,
|
|
||||||
packbuf_t pack_buf_type );
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
cntl_t* bli_packm_cntl_obj_create
|
||||||
|
(
|
||||||
|
void* var_func,
|
||||||
|
void* packm_var_func,
|
||||||
|
bszid_t bmid_m,
|
||||||
|
bszid_t bmid_n,
|
||||||
|
bool_t does_invert_diag,
|
||||||
|
bool_t rev_iter_if_upper,
|
||||||
|
bool_t rev_iter_if_lower,
|
||||||
|
pack_t pack_schema,
|
||||||
|
packbuf_t pack_buf_type,
|
||||||
|
cntl_t* sub_node
|
||||||
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ void bli_packm_cntx_init( cntx_t* cntx )
|
|||||||
bli_gks_cntx_set_l1v_ker( BLIS_SETV_KER, cntx );
|
bli_gks_cntx_set_l1v_ker( BLIS_SETV_KER, cntx );
|
||||||
|
|
||||||
// Initialize the context with the global membrk object.
|
// Initialize the context with the global membrk object.
|
||||||
bli_cntx_set_membrk( bli_mem_global_membrk(), cntx );
|
bli_cntx_set_membrk( bli_memsys_global_membrk(), cntx );
|
||||||
}
|
}
|
||||||
|
|
||||||
void bli_packm_cntx_finalize( cntx_t* cntx )
|
void bli_packm_cntx_finalize( cntx_t* cntx )
|
||||||
|
|||||||
@@ -35,38 +35,43 @@
|
|||||||
|
|
||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
void bli_packm_init( obj_t* a,
|
siz_t bli_packm_init
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* a,
|
||||||
packm_t* cntl )
|
obj_t* p,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl
|
||||||
|
)
|
||||||
{
|
{
|
||||||
// The purpose of packm_init() is to initialize an object P so that
|
// The purpose of packm_init() is to initialize an object P so that
|
||||||
// a source object A can be packed into P via one of the packm
|
// a source object A can be packed into P via one of the packm
|
||||||
// implementations. This initialization includes acquiring a suitable
|
// implementations. This initialization precedes the acquisition of a
|
||||||
// block of memory from the memory allocator, if such a block of memory
|
// suitable block of memory from the memory allocator (if such a block
|
||||||
// has not already been allocated previously.
|
// of memory has not already been allocated previously).
|
||||||
|
|
||||||
invdiag_t invert_diag;
|
|
||||||
pack_t schema;
|
|
||||||
packord_t pack_ord_if_up;
|
|
||||||
packord_t pack_ord_if_lo;
|
|
||||||
packbuf_t pack_buf_type;
|
|
||||||
bszid_t bmult_id_m;
|
bszid_t bmult_id_m;
|
||||||
bszid_t bmult_id_n;
|
bszid_t bmult_id_n;
|
||||||
obj_t c;
|
bool_t does_invert_diag;
|
||||||
|
bool_t rev_iter_if_upper;
|
||||||
|
bool_t rev_iter_if_lower;
|
||||||
|
//pack_t pack_schema;
|
||||||
|
packbuf_t pack_buf_type;
|
||||||
|
siz_t size_needed;
|
||||||
|
|
||||||
// Check parameters.
|
// Check parameters.
|
||||||
if ( bli_error_checking_is_enabled() )
|
if ( bli_error_checking_is_enabled() )
|
||||||
bli_packm_init_check( a, p, cntx );
|
bli_packm_init_check( a, p, cntx );
|
||||||
|
|
||||||
// First check if we are to skip this operation because the control tree
|
// Extract various fields from the control tree.
|
||||||
// is NULL, and if so, simply alias the object to its packed counterpart.
|
bmult_id_m = bli_cntl_packm_params_bmid_m( cntl );
|
||||||
if ( bli_cntl_is_noop( cntl ) )
|
bmult_id_n = bli_cntl_packm_params_bmid_n( cntl );
|
||||||
{
|
does_invert_diag = bli_cntl_packm_params_does_invert_diag( cntl );
|
||||||
bli_obj_alias_to( *a, *p );
|
rev_iter_if_upper = bli_cntl_packm_params_rev_iter_if_upper( cntl );
|
||||||
return;
|
rev_iter_if_lower = bli_cntl_packm_params_rev_iter_if_lower( cntl );
|
||||||
}
|
//pack_schema = bli_cntl_packm_params_pack_schema( cntl );
|
||||||
|
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
|
||||||
|
|
||||||
|
#if 0
|
||||||
// Let us now check to see if the object has already been packed. First
|
// Let us now check to see if the object has already been packed. First
|
||||||
// we check if it has been packed to an unspecified (row or column)
|
// we check if it has been packed to an unspecified (row or column)
|
||||||
// format, in which case we can alias the object and return.
|
// format, in which case we can alias the object and return.
|
||||||
@@ -79,179 +84,150 @@ void bli_packm_init( obj_t* a,
|
|||||||
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
|
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
|
||||||
{
|
{
|
||||||
bli_obj_alias_to( *a, *p );
|
bli_obj_alias_to( *a, *p );
|
||||||
return;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// At this point, we can be assured that cntl is not NULL. Now we check
|
// Now we check if the object has already been packed to the desired
|
||||||
// if the object has already been packed to the desired schema (as en-
|
// schema (as encoded in the control tree). If so, we can alias and
|
||||||
// coded in the control tree). If so, we can alias and return, as above.
|
// return 0.
|
||||||
// NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED
|
// NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED
|
||||||
// and thus packing will be called for (but in some cases packing has
|
// and thus packing will be called for (but in some cases packing has
|
||||||
// already taken place, or does not need to take place, and so that will
|
// already taken place, or does not need to take place, and so that will
|
||||||
// be indicated by the pack status). Also, not all combinations of
|
// be indicated by the pack status). Also, not all combinations of
|
||||||
// current pack status and desired pack schema are valid.
|
// current pack status and desired pack schema are valid.
|
||||||
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) )
|
if ( bli_obj_pack_schema( *a ) == pack_schema )
|
||||||
{
|
{
|
||||||
bli_obj_alias_to( *a, *p );
|
bli_obj_alias_to( *a, *p );
|
||||||
return;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// If the object is marked as being filled with zeros, then we can skip
|
// If the object is marked as being filled with zeros, then we can skip
|
||||||
// the packm operation entirely and alias. Notice that we use pack-aware
|
// the packm operation entirely and alias.
|
||||||
// aliasing. This is needed because the object may have been packed in
|
|
||||||
// a previous iteration, which means the object currently contains the
|
|
||||||
// mem_t entry of an already-allocated block. bli_obj_alias_for_packing()
|
|
||||||
// will avoid overwriting that mem_t entry, which means it can be
|
|
||||||
// properly released later on.
|
|
||||||
if ( bli_obj_is_zeros( *a ) )
|
if ( bli_obj_is_zeros( *a ) )
|
||||||
{
|
{
|
||||||
bli_obj_alias_for_packing( *a, *p );
|
bli_obj_alias_to( *a, *p );
|
||||||
return;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now, if we are not skipping the pack operation, then the only question
|
// We now ignore the pack_schema field in the control tree and
|
||||||
// left is whether we are to typecast matrix a before packing.
|
// extract the schema from the context, depending on whether we are
|
||||||
if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) )
|
|
||||||
bli_abort();
|
|
||||||
/*
|
|
||||||
{
|
|
||||||
// Initialize an object c for the intermediate typecast matrix.
|
|
||||||
bli_packm_init_cast( a,
|
|
||||||
p,
|
|
||||||
&c );
|
|
||||||
|
|
||||||
// Copy/typecast matrix a to matrix c.
|
|
||||||
bli_copym( a,
|
|
||||||
&c );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
// If no cast is needed, then aliasing object c to the original
|
|
||||||
// matrix serves as a minor optimization. This causes the packm
|
|
||||||
// implementation to pack directly from matrix a.
|
|
||||||
bli_obj_alias_to( *a, c );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Extract various fields from the control tree.
|
|
||||||
pack_buf_type = cntl_pack_buf_type( cntl );
|
|
||||||
bmult_id_m = cntl_bmid_m( cntl );
|
|
||||||
bmult_id_n = cntl_bmid_n( cntl );
|
|
||||||
|
|
||||||
// Extract the schema from the context, depending on whether we are
|
|
||||||
// preparing to pack a block of A or panel of B. For A and B, we must
|
// preparing to pack a block of A or panel of B. For A and B, we must
|
||||||
// obtain the schema from the context since the induced methods reuse
|
// obtain the schema from the context since the induced methods reuse
|
||||||
// the same control trees used by native execution, and those induced
|
// the same control trees used by native execution, and those induced
|
||||||
// methods specify the schema used by the current execution phase
|
// methods specify the schema used by the current execution phase
|
||||||
// within the context (whereas the control tree does not change).
|
// within the context (whereas the control tree does not change).
|
||||||
|
pack_t schema;
|
||||||
|
|
||||||
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK )
|
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK )
|
||||||
{
|
{
|
||||||
schema = bli_cntx_get_pack_schema_a( cntx );
|
schema = bli_cntx_get_pack_schema_a( cntx );
|
||||||
//printf( "bli_packm_init: pack schema a = %x\n", schema );
|
|
||||||
}
|
}
|
||||||
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL )
|
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL )
|
||||||
{
|
{
|
||||||
schema = bli_cntx_get_pack_schema_b( cntx );
|
schema = bli_cntx_get_pack_schema_b( cntx );
|
||||||
//printf( "bli_packm_init: pack schema b = %x\n", schema );
|
|
||||||
}
|
}
|
||||||
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
|
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
|
||||||
{
|
{
|
||||||
// If we get a request to pack C for some reason, it is likely
|
// If we get a request to pack C for some reason, it is likely
|
||||||
// not part of an induced method, and so it would be safe (and
|
// not part of an induced method, and so it would be safe (and
|
||||||
// necessary) to read the pack schema from the control tree.
|
// necessary) to read the pack schema from the control tree.
|
||||||
schema = cntl_pack_schema( cntl );
|
schema = bli_cntl_packm_params_pack_schema( cntl );
|
||||||
//printf( "bli_packm_init: pack schema c = %x\n", schema );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare a few other variables based on properties of the control
|
// Prepare a few other variables based on properties of the control
|
||||||
// tree.
|
// tree.
|
||||||
|
|
||||||
if ( cntl_does_invert_diag( cntl ) ) invert_diag = BLIS_INVERT_DIAG;
|
invdiag_t invert_diag;
|
||||||
else invert_diag = BLIS_NO_INVERT_DIAG;
|
packord_t pack_ord_if_up;
|
||||||
|
packord_t pack_ord_if_lo;
|
||||||
|
|
||||||
if ( cntl_rev_iter_if_upper( cntl ) ) pack_ord_if_up = BLIS_PACK_REV_IF_UPPER;
|
if ( does_invert_diag ) invert_diag = BLIS_INVERT_DIAG;
|
||||||
else pack_ord_if_up = BLIS_PACK_FWD_IF_UPPER;
|
else invert_diag = BLIS_NO_INVERT_DIAG;
|
||||||
|
|
||||||
if ( cntl_rev_iter_if_lower( cntl ) ) pack_ord_if_lo = BLIS_PACK_REV_IF_LOWER;
|
if ( rev_iter_if_upper ) pack_ord_if_up = BLIS_PACK_REV_IF_UPPER;
|
||||||
else pack_ord_if_lo = BLIS_PACK_FWD_IF_LOWER;
|
else pack_ord_if_up = BLIS_PACK_FWD_IF_UPPER;
|
||||||
|
|
||||||
|
if ( rev_iter_if_lower ) pack_ord_if_lo = BLIS_PACK_REV_IF_LOWER;
|
||||||
|
else pack_ord_if_lo = BLIS_PACK_FWD_IF_LOWER;
|
||||||
|
|
||||||
// Initialize object p for the final packed matrix.
|
// Initialize object p for the final packed matrix.
|
||||||
bli_packm_init_pack( invert_diag,
|
size_needed
|
||||||
schema,
|
=
|
||||||
pack_ord_if_up,
|
bli_packm_init_pack
|
||||||
pack_ord_if_lo,
|
(
|
||||||
pack_buf_type,
|
invert_diag,
|
||||||
bmult_id_m,
|
schema,
|
||||||
bmult_id_n,
|
pack_ord_if_up,
|
||||||
&c,
|
pack_ord_if_lo,
|
||||||
p,
|
bmult_id_m,
|
||||||
cntx );
|
bmult_id_n,
|
||||||
|
a,
|
||||||
|
p,
|
||||||
|
cntx
|
||||||
|
);
|
||||||
|
|
||||||
// Now p is ready to be packed.
|
// Return the size needed for memory allocation of the packed buffer.
|
||||||
|
return size_needed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void bli_packm_init_pack( invdiag_t invert_diag,
|
siz_t bli_packm_init_pack
|
||||||
pack_t schema,
|
(
|
||||||
packord_t pack_ord_if_up,
|
invdiag_t invert_diag,
|
||||||
packord_t pack_ord_if_lo,
|
pack_t schema,
|
||||||
packbuf_t pack_buf_type,
|
packord_t pack_ord_if_up,
|
||||||
bszid_t bmult_id_m,
|
packord_t pack_ord_if_lo,
|
||||||
bszid_t bmult_id_n,
|
bszid_t bmult_id_m,
|
||||||
obj_t* c,
|
bszid_t bmult_id_n,
|
||||||
obj_t* p,
|
obj_t* a,
|
||||||
cntx_t* cntx )
|
obj_t* p,
|
||||||
|
cntx_t* cntx
|
||||||
|
)
|
||||||
{
|
{
|
||||||
num_t dt = bli_obj_datatype( *c );
|
num_t dt = bli_obj_datatype( *a );
|
||||||
trans_t transc = bli_obj_onlytrans_status( *c );
|
trans_t transa = bli_obj_onlytrans_status( *a );
|
||||||
dim_t m_c = bli_obj_length( *c );
|
dim_t m_a = bli_obj_length( *a );
|
||||||
dim_t n_c = bli_obj_width( *c );
|
dim_t n_a = bli_obj_width( *a );
|
||||||
dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_m, cntx );
|
dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_m, cntx );
|
||||||
dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_m, cntx );
|
dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_m, cntx );
|
||||||
dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_n, cntx );
|
dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_n, cntx );
|
||||||
dim_t bmult_n_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_n, cntx );
|
dim_t bmult_n_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_n, cntx );
|
||||||
|
|
||||||
membrk_t* membrk = bli_cntx_get_membrk( cntx );
|
|
||||||
|
|
||||||
mem_t* mem_p;
|
|
||||||
dim_t m_p, n_p;
|
dim_t m_p, n_p;
|
||||||
dim_t m_p_pad, n_p_pad;
|
dim_t m_p_pad, n_p_pad;
|
||||||
siz_t size_p;
|
siz_t size_p;
|
||||||
siz_t elem_size_p;
|
siz_t elem_size_p;
|
||||||
inc_t rs_p, cs_p;
|
inc_t rs_p, cs_p;
|
||||||
inc_t is_p;
|
inc_t is_p;
|
||||||
void* buf;
|
|
||||||
|
|
||||||
|
|
||||||
// We begin by copying the basic fields of c. We do NOT copy the
|
// We begin by copying the fields of A.
|
||||||
// pack_mem entry from c because the entry in p may be cached from
|
bli_obj_alias_to( *a, *p );
|
||||||
// a previous iteration, and thus we don't want to overwrite it.
|
|
||||||
bli_obj_alias_for_packing( *c, *p );
|
|
||||||
|
|
||||||
// Update the dimension fields to explicitly reflect a transposition,
|
// Update the dimension fields to explicitly reflect a transposition,
|
||||||
// if needed.
|
// if needed.
|
||||||
// Then, clear the conjugation and transposition fields from the object
|
// Then, clear the conjugation and transposition fields from the object
|
||||||
// since matrix packing in BLIS is deemed to take care of all conjugation
|
// since matrix packing in BLIS is deemed to take care of all conjugation
|
||||||
// and transposition necessary.
|
// and transposition necessary.
|
||||||
// Then, we adjust the properties of p when c needs a transposition.
|
// Then, we adjust the properties of P when A needs a transposition.
|
||||||
// We negate the diagonal offset, and if c is upper- or lower-stored,
|
// We negate the diagonal offset, and if A is upper- or lower-stored,
|
||||||
// we either toggle the uplo of p.
|
// we either toggle the uplo of P.
|
||||||
// Finally, if we mark p as dense since we assume that all matrices,
|
// Finally, if we mark P as dense since we assume that all matrices,
|
||||||
// regardless of structure, will be densified.
|
// regardless of structure, will be densified.
|
||||||
bli_obj_set_dims_with_trans( transc, m_c, n_c, *p );
|
bli_obj_set_dims_with_trans( transa, m_a, n_a, *p );
|
||||||
bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p );
|
bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p );
|
||||||
if ( bli_does_trans( transc ) )
|
if ( bli_does_trans( transa ) )
|
||||||
{
|
{
|
||||||
bli_obj_negate_diag_offset( *p );
|
bli_obj_negate_diag_offset( *p );
|
||||||
if ( bli_obj_is_upper_or_lower( *c ) )
|
if ( bli_obj_is_upper_or_lower( *a ) )
|
||||||
bli_obj_toggle_uplo( *p );
|
bli_obj_toggle_uplo( *p );
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are packing micro-panels, mark p as dense. Otherwise, we are
|
// If we are packing micro-panels, mark P as dense. Otherwise, we are
|
||||||
// probably being called in the context of a level-2 operation, in
|
// probably being called in the context of a level-2 operation, in
|
||||||
// which case we do not want to overwrite the uplo field of p (inherited
|
// which case we do not want to overwrite the uplo field of P (inherited
|
||||||
// from c) with BLIS_DENSE because that information may be needed by
|
// from A) with BLIS_DENSE because that information may be needed by
|
||||||
// the level-2 operation's unblocked variant to decide whether to
|
// the level-2 operation's unblocked variant to decide whether to
|
||||||
// execute a "lower" or "upper" branch of code.
|
// execute a "lower" or "upper" branch of code.
|
||||||
if ( bli_is_panel_packed( schema ) )
|
if ( bli_is_panel_packed( schema ) )
|
||||||
@@ -265,7 +241,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
|||||||
// Set the invert diagonal field.
|
// Set the invert diagonal field.
|
||||||
bli_obj_set_invert_diag( invert_diag, *p );
|
bli_obj_set_invert_diag( invert_diag, *p );
|
||||||
|
|
||||||
// Set the pack status of p to the pack schema prescribed in the control
|
// Set the pack status of P to the pack schema prescribed in the control
|
||||||
// tree node.
|
// tree node.
|
||||||
bli_obj_set_pack_schema( schema, *p );
|
bli_obj_set_pack_schema( schema, *p );
|
||||||
|
|
||||||
@@ -273,15 +249,11 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
|||||||
bli_obj_set_pack_order_if_upper( pack_ord_if_up, *p );
|
bli_obj_set_pack_order_if_upper( pack_ord_if_up, *p );
|
||||||
bli_obj_set_pack_order_if_lower( pack_ord_if_lo, *p );
|
bli_obj_set_pack_order_if_lower( pack_ord_if_lo, *p );
|
||||||
|
|
||||||
// Extract the address of the mem_t object within p that will track
|
|
||||||
// properties of the packed buffer.
|
|
||||||
mem_p = bli_obj_pack_mem( *p );
|
|
||||||
|
|
||||||
// Compute the dimensions padded by the dimension multiples. These
|
// Compute the dimensions padded by the dimension multiples. These
|
||||||
// dimensions will be the dimensions of the packed matrices, including
|
// dimensions will be the dimensions of the packed matrices, including
|
||||||
// zero-padding, and will be used by the macro- and micro-kernels.
|
// zero-padding, and will be used by the macro- and micro-kernels.
|
||||||
// We compute them by starting with the effective dimensions of c (now
|
// We compute them by starting with the effective dimensions of A (now
|
||||||
// in p) and aligning them to the dimension multiples (typically equal
|
// in P) and aligning them to the dimension multiples (typically equal
|
||||||
// to register blocksizes). This does waste a little bit of space for
|
// to register blocksizes). This does waste a little bit of space for
|
||||||
// level-2 operations, but that's okay with us.
|
// level-2 operations, but that's okay with us.
|
||||||
m_p = bli_obj_length( *p );
|
m_p = bli_obj_length( *p );
|
||||||
@@ -295,9 +267,9 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
|||||||
bli_obj_set_padded_dims( m_p_pad, n_p_pad, *p );
|
bli_obj_set_padded_dims( m_p_pad, n_p_pad, *p );
|
||||||
|
|
||||||
// Now we prepare to compute strides, align them, and compute the
|
// Now we prepare to compute strides, align them, and compute the
|
||||||
// total number of bytes needed for the packed buffer. After that,
|
// total number of bytes needed for the packed buffer. The caller
|
||||||
// we will acquire an appropriate block of memory from the memory
|
// will then use that value to acquire an appropriate block of memory
|
||||||
// allocator.
|
// from the memory allocator.
|
||||||
|
|
||||||
// Extract the element size for the packed object.
|
// Extract the element size for the packed object.
|
||||||
elem_size_p = bli_obj_elem_size( *p );
|
elem_size_p = bli_obj_elem_size( *p );
|
||||||
@@ -320,7 +292,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
|||||||
rs_p = bli_align_dim_to_size( rs_p, elem_size_p,
|
rs_p = bli_align_dim_to_size( rs_p, elem_size_p,
|
||||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
||||||
|
|
||||||
// Store the strides in p.
|
// Store the strides in P.
|
||||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||||
|
|
||||||
// Compute the size of the packed buffer.
|
// Compute the size of the packed buffer.
|
||||||
@@ -343,7 +315,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
|||||||
cs_p = bli_align_dim_to_size( cs_p, elem_size_p,
|
cs_p = bli_align_dim_to_size( cs_p, elem_size_p,
|
||||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
||||||
|
|
||||||
// Store the strides in p.
|
// Store the strides in P.
|
||||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||||
|
|
||||||
// Compute the size of the packed buffer.
|
// Compute the size of the packed buffer.
|
||||||
@@ -431,7 +403,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
|||||||
else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( m_p_pad / m_panel );
|
else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( m_p_pad / m_panel );
|
||||||
else is_p = 1;
|
else is_p = 1;
|
||||||
|
|
||||||
// Store the strides and panel dimension in p.
|
// Store the strides and panel dimension in P.
|
||||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||||
bli_obj_set_imag_stride( is_p, *p );
|
bli_obj_set_imag_stride( is_p, *p );
|
||||||
bli_obj_set_panel_dim( m_panel, *p );
|
bli_obj_set_panel_dim( m_panel, *p );
|
||||||
@@ -524,7 +496,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
|||||||
else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( n_p_pad / n_panel );
|
else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( n_p_pad / n_panel );
|
||||||
else is_p = 1;
|
else is_p = 1;
|
||||||
|
|
||||||
// Store the strides and panel dimension in p.
|
// Store the strides and panel dimension in P.
|
||||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||||
bli_obj_set_imag_stride( is_p, *p );
|
bli_obj_set_imag_stride( is_p, *p );
|
||||||
bli_obj_set_panel_dim( n_panel, *p );
|
bli_obj_set_panel_dim( n_panel, *p );
|
||||||
@@ -547,99 +519,6 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
|||||||
size_p = 0;
|
size_p = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return size_p;
|
||||||
if ( bli_mem_is_unalloc( mem_p ) )
|
|
||||||
{
|
|
||||||
// If the mem_t object of p has not yet been allocated, then acquire
|
|
||||||
// a memory block of type pack_buf_type.
|
|
||||||
bli_membrk_acquire_m( membrk,
|
|
||||||
size_p,
|
|
||||||
pack_buf_type,
|
|
||||||
mem_p );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// If the mem_t object is currently allocated and smaller than is
|
|
||||||
// needed, then it must have been allocated for a different type
|
|
||||||
// of object (a different pack_buf_type value), so we must first
|
|
||||||
// release it and then re-acquire it using the new size and new
|
|
||||||
// pack_buf_type value.
|
|
||||||
if ( bli_mem_size( mem_p ) < size_p )
|
|
||||||
{
|
|
||||||
bli_membrk_release( mem_p );
|
|
||||||
bli_membrk_acquire_m( membrk,
|
|
||||||
size_p,
|
|
||||||
pack_buf_type,
|
|
||||||
mem_p );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Grab the buffer address from the mem_t object and copy it to the
|
|
||||||
// main object buffer field. (Sometimes this buffer address will be
|
|
||||||
// copied when the value is already up-to-date, because it persists
|
|
||||||
// in the main object buffer field across loop iterations.)
|
|
||||||
buf = bli_mem_buffer( mem_p );
|
|
||||||
bli_obj_set_buffer( buf, *p );
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void bli_packm_release( obj_t* p,
|
|
||||||
packm_t* cntl )
|
|
||||||
{
|
|
||||||
if ( !bli_cntl_is_noop( cntl ) )
|
|
||||||
bli_obj_release_pack( p );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
void bli_packm_init_cast( obj_t* a,
|
|
||||||
obj_t* p,
|
|
||||||
obj_t* c )
|
|
||||||
{
|
|
||||||
// The idea here is that we want to create an object c that is identical
|
|
||||||
// to object a, except that:
|
|
||||||
// (1) the storage datatype of c is equal to the target datatype of a,
|
|
||||||
// with the element size of c adjusted accordingly,
|
|
||||||
// (2) the view offset of c is reset to (0,0),
|
|
||||||
// (3) object c's main buffer is set to a new memory region acquired
|
|
||||||
// from the memory manager, or extracted from p if a mem entry is
|
|
||||||
// already available, (After acquring a mem entry from the memory
|
|
||||||
// manager, it is cached within p for quick access later on.)
|
|
||||||
// (4) object c is marked as being stored in a standard, contiguous
|
|
||||||
// format (ie: a column-major order).
|
|
||||||
// Any transposition encoded within object a will not be handled here,
|
|
||||||
// but rather will be handled in the packm implementation. That way,
|
|
||||||
// the only thing castm needs to do is cast.
|
|
||||||
|
|
||||||
num_t dt_targ_a = bli_obj_target_datatype( *a );
|
|
||||||
dim_t m_a = bli_obj_length( *a );
|
|
||||||
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
|
|
||||||
inc_t rs_c, cs_c;
|
|
||||||
|
|
||||||
// We begin by copying the basic fields of a.
|
|
||||||
bli_obj_alias_to( *a, *c );
|
|
||||||
|
|
||||||
// Update datatype and element size fields.
|
|
||||||
bli_obj_set_datatype( dt_targ_a, *c );
|
|
||||||
bli_obj_set_elem_size( elem_size_c, *c );
|
|
||||||
|
|
||||||
// Reset the view offsets to (0,0).
|
|
||||||
bli_obj_set_offs( 0, 0, *c );
|
|
||||||
|
|
||||||
// Check the mem_t entry of p associated with the cast buffer. If it is
|
|
||||||
// NULL, then acquire memory sufficient to hold the object data and cache
|
|
||||||
// it to p. (Otherwise, if it is non-NULL, then memory has already been
|
|
||||||
// acquired from the memory manager and cached.) We then set the main
|
|
||||||
// buffer of c to the cached address of the cast memory.
|
|
||||||
bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
|
|
||||||
|
|
||||||
// Update the strides. We set the increments to reflect column-major order
|
|
||||||
// storage. We start the leading dimension out as m(a) and increment it if
|
|
||||||
// necessary so that the beginning of each column is aligned.
|
|
||||||
cs_c = bli_align_dim_to_size( m_a, elem_size_c,
|
|
||||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
|
||||||
rs_c = 1;
|
|
||||||
bli_obj_set_strides( rs_c, cs_c, *c );
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|||||||
@@ -32,28 +32,24 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void bli_packm_init( obj_t* a,
|
siz_t bli_packm_init
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* a,
|
||||||
packm_t* cntl );
|
obj_t* p,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl
|
||||||
|
);
|
||||||
|
|
||||||
void bli_packm_init_pack( invdiag_t invert_diag,
|
siz_t bli_packm_init_pack
|
||||||
pack_t pack_schema,
|
(
|
||||||
packord_t pack_ord_if_up,
|
invdiag_t invert_diag,
|
||||||
packord_t pack_ord_if_lo,
|
pack_t schema,
|
||||||
packbuf_t pack_buf_type,
|
packord_t pack_ord_if_up,
|
||||||
bszid_t mr_id,
|
packord_t pack_ord_if_lo,
|
||||||
bszid_t nr_id,
|
bszid_t bmult_id_m,
|
||||||
obj_t* c,
|
bszid_t bmult_id_n,
|
||||||
obj_t* p,
|
obj_t* a,
|
||||||
cntx_t* cntx );
|
obj_t* p,
|
||||||
|
cntx_t* cntx
|
||||||
/*
|
);
|
||||||
void bli_packm_init_cast( obj_t* a,
|
|
||||||
obj_t* p,
|
|
||||||
obj_t* c );
|
|
||||||
*/
|
|
||||||
|
|
||||||
void bli_packm_release( obj_t* p,
|
|
||||||
packm_t* cntl );
|
|
||||||
|
|
||||||
|
|||||||
@@ -34,33 +34,16 @@
|
|||||||
|
|
||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
#define FUNCPTR_T packm_fp
|
void bli_packm_int
|
||||||
|
(
|
||||||
typedef void (*FUNCPTR_T)( obj_t* a,
|
obj_t* a,
|
||||||
obj_t* p,
|
obj_t* p,
|
||||||
cntx_t* cntx,
|
cntx_t* cntx,
|
||||||
thrinfo_t* t );
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
static FUNCPTR_T vars[6][3] =
|
)
|
||||||
{
|
{
|
||||||
// unblocked optimized unblocked blocked
|
packm_voft f;
|
||||||
{ bli_packm_unb_var1, NULL, bli_packm_blk_var1 },
|
|
||||||
{ NULL, NULL, NULL, },
|
|
||||||
{ NULL, NULL, NULL, },
|
|
||||||
{ NULL, NULL, NULL, },
|
|
||||||
{ NULL, NULL, NULL, },
|
|
||||||
{ NULL, NULL, NULL, },
|
|
||||||
};
|
|
||||||
|
|
||||||
void bli_packm_int( obj_t* a,
|
|
||||||
obj_t* p,
|
|
||||||
cntx_t* cntx,
|
|
||||||
packm_t* cntl,
|
|
||||||
thrinfo_t* thread )
|
|
||||||
{
|
|
||||||
varnum_t n;
|
|
||||||
impl_t i;
|
|
||||||
FUNCPTR_T f;
|
|
||||||
|
|
||||||
// Check parameters.
|
// Check parameters.
|
||||||
if ( bli_error_checking_is_enabled() )
|
if ( bli_error_checking_is_enabled() )
|
||||||
@@ -70,14 +53,6 @@ void bli_packm_int( obj_t* a,
|
|||||||
// it, then we should fold it into the next alias-and-early-exit block.
|
// it, then we should fold it into the next alias-and-early-exit block.
|
||||||
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
|
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
|
||||||
|
|
||||||
// First check if we are to skip this operation because the control tree
|
|
||||||
// is NULL. We return without taking any action because a was already
|
|
||||||
// aliased to p in packm_init().
|
|
||||||
if ( bli_cntl_is_noop( cntl ) )
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Let us now check to see if the object has already been packed. First
|
// Let us now check to see if the object has already been packed. First
|
||||||
// we check if it has been packed to an unspecified (row or column)
|
// we check if it has been packed to an unspecified (row or column)
|
||||||
// format, in which case we can return, since by now aliasing has already
|
// format, in which case we can return, since by now aliasing has already
|
||||||
@@ -101,7 +76,7 @@ void bli_packm_int( obj_t* a,
|
|||||||
// already taken place, or does not need to take place, and so that will
|
// already taken place, or does not need to take place, and so that will
|
||||||
// be indicated by the pack status). Also, not all combinations of
|
// be indicated by the pack status). Also, not all combinations of
|
||||||
// current pack status and desired pack schema are valid.
|
// current pack status and desired pack schema are valid.
|
||||||
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) )
|
if ( bli_obj_pack_schema( *a ) == bli_cntl_packm_params_pack_schema( cntl ) )
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -113,21 +88,17 @@ void bli_packm_int( obj_t* a,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract the function pointer from the current control tree node.
|
||||||
// Extract the variant number and implementation type.
|
f = bli_cntl_packm_params_var_func( cntl );
|
||||||
n = bli_cntl_var_num( cntl );
|
|
||||||
i = bli_cntl_impl_type( cntl );
|
|
||||||
|
|
||||||
// Index into the variant array to extract the correct function pointer.
|
|
||||||
f = vars[n][i];
|
|
||||||
|
|
||||||
// Invoke the variant with kappa_use.
|
// Invoke the variant with kappa_use.
|
||||||
f( a,
|
f
|
||||||
p,
|
(
|
||||||
cntx,
|
a,
|
||||||
thread );
|
p,
|
||||||
|
cntx,
|
||||||
// Barrier so that packing is done before computation
|
cntl,
|
||||||
bli_thread_obarrier( thread );
|
thread
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -32,9 +32,11 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void bli_packm_int( obj_t* a,
|
void bli_packm_int
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* a,
|
||||||
packm_t* cntl,
|
obj_t* p,
|
||||||
thrinfo_t* thread );
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
|
);
|
||||||
|
|||||||
@@ -41,7 +41,8 @@ thrinfo_t* bli_packm_thrinfo_create
|
|||||||
thrcomm_t* icomm,
|
thrcomm_t* icomm,
|
||||||
dim_t icomm_id,
|
dim_t icomm_id,
|
||||||
dim_t n_way,
|
dim_t n_way,
|
||||||
dim_t work_id
|
dim_t work_id,
|
||||||
|
thrinfo_t* sub_node
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
thrinfo_t* thread = bli_malloc_intl( sizeof( thrinfo_t ) );
|
thrinfo_t* thread = bli_malloc_intl( sizeof( thrinfo_t ) );
|
||||||
@@ -53,9 +54,8 @@ thrinfo_t* bli_packm_thrinfo_create
|
|||||||
icomm, icomm_id,
|
icomm, icomm_id,
|
||||||
n_way,
|
n_way,
|
||||||
work_id,
|
work_id,
|
||||||
NULL,
|
FALSE,
|
||||||
NULL,
|
sub_node
|
||||||
NULL
|
|
||||||
);
|
);
|
||||||
|
|
||||||
return thread;
|
return thread;
|
||||||
@@ -69,7 +69,8 @@ void bli_packm_thrinfo_init
|
|||||||
thrcomm_t* icomm,
|
thrcomm_t* icomm,
|
||||||
dim_t icomm_id,
|
dim_t icomm_id,
|
||||||
dim_t n_way,
|
dim_t n_way,
|
||||||
dim_t work_id
|
dim_t work_id,
|
||||||
|
thrinfo_t* sub_node
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
bli_thrinfo_init
|
bli_thrinfo_init
|
||||||
@@ -78,9 +79,8 @@ void bli_packm_thrinfo_init
|
|||||||
ocomm, ocomm_id,
|
ocomm, ocomm_id,
|
||||||
icomm, icomm_id,
|
icomm, icomm_id,
|
||||||
n_way, work_id,
|
n_way, work_id,
|
||||||
NULL,
|
FALSE,
|
||||||
NULL,
|
sub_node
|
||||||
NULL
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,7 +95,8 @@ void bli_packm_thrinfo_init_single
|
|||||||
&BLIS_SINGLE_COMM, 0,
|
&BLIS_SINGLE_COMM, 0,
|
||||||
&BLIS_SINGLE_COMM, 0,
|
&BLIS_SINGLE_COMM, 0,
|
||||||
1,
|
1,
|
||||||
0
|
0,
|
||||||
|
NULL
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -49,7 +49,8 @@ thrinfo_t* bli_packm_thrinfo_create
|
|||||||
thrcomm_t* icomm,
|
thrcomm_t* icomm,
|
||||||
dim_t icomm_id,
|
dim_t icomm_id,
|
||||||
dim_t n_way,
|
dim_t n_way,
|
||||||
dim_t work_id
|
dim_t work_id,
|
||||||
|
thrinfo_t* sub_node
|
||||||
);
|
);
|
||||||
|
|
||||||
void bli_packm_thrinfo_init
|
void bli_packm_thrinfo_init
|
||||||
@@ -60,7 +61,8 @@ void bli_packm_thrinfo_init
|
|||||||
thrcomm_t* icomm,
|
thrcomm_t* icomm,
|
||||||
dim_t icomm_id,
|
dim_t icomm_id,
|
||||||
dim_t n_way,
|
dim_t n_way,
|
||||||
dim_t work_id
|
dim_t work_id,
|
||||||
|
thrinfo_t* sub_node
|
||||||
);
|
);
|
||||||
|
|
||||||
void bli_packm_thrinfo_init_single
|
void bli_packm_thrinfo_init_single
|
||||||
|
|||||||
@@ -55,10 +55,14 @@ typedef void (*FUNCPTR_T)(
|
|||||||
static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1);
|
static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1);
|
||||||
|
|
||||||
|
|
||||||
void bli_packm_unb_var1( obj_t* c,
|
void bli_packm_unb_var1
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* c,
|
||||||
thrinfo_t* thread )
|
obj_t* p,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
|
)
|
||||||
{
|
{
|
||||||
num_t dt_cp = bli_obj_datatype( *c );
|
num_t dt_cp = bli_obj_datatype( *c );
|
||||||
|
|
||||||
|
|||||||
@@ -32,10 +32,14 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void bli_packm_unb_var1( obj_t* c,
|
void bli_packm_unb_var1
|
||||||
obj_t* p,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* c,
|
||||||
thrinfo_t* thread );
|
obj_t* p,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
#undef GENTPROT
|
#undef GENTPROT
|
||||||
|
|||||||
@@ -32,6 +32,5 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void bli_trsm_cntx_init( void );
|
#include "bli_scalm_cntl.h"
|
||||||
void bli_trsm_cntx_finalize( void );
|
|
||||||
|
|
||||||
@@ -34,38 +34,25 @@
|
|||||||
|
|
||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
scalm_t* scalm_cntl = NULL;
|
cntl_t* bli_scalm_cntl_obj_create
|
||||||
|
(
|
||||||
void bli_scalm_cntl_init()
|
void* var_func,
|
||||||
|
cntl_t* sub_node
|
||||||
|
)
|
||||||
{
|
{
|
||||||
scalm_cntl = bli_scalm_cntl_obj_create( BLIS_UNBLOCKED,
|
cntl_t* cntl;
|
||||||
BLIS_VARIANT1 );
|
|
||||||
}
|
|
||||||
|
|
||||||
void bli_scalm_cntl_finalize()
|
// It's important that we set the bszid field to BLIS_NO_PART to indicate
|
||||||
{
|
// that no blocksize partitioning is performed. bli_cntl_free() will rely
|
||||||
bli_cntl_obj_free( scalm_cntl );
|
// on this information to know how to step through the thrinfo_t tree in
|
||||||
}
|
// sync with the cntl_t tree.
|
||||||
|
cntl = bli_cntl_obj_create
|
||||||
|
(
|
||||||
scalm_t* bli_scalm_cntl_obj_create( impl_t impl_type,
|
BLIS_NO_PART,
|
||||||
varnum_t var_num )
|
var_func,
|
||||||
{
|
NULL,
|
||||||
scalm_t* cntl;
|
sub_node
|
||||||
|
);
|
||||||
cntl = ( scalm_t* ) bli_malloc_intl( sizeof(scalm_t) );
|
|
||||||
|
|
||||||
cntl->impl_type = impl_type;
|
|
||||||
cntl->var_num = var_num;
|
|
||||||
|
|
||||||
return cntl;
|
return cntl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bli_scalm_cntl_obj_init( scalm_t* cntl,
|
|
||||||
impl_t impl_type,
|
|
||||||
varnum_t var_num )
|
|
||||||
{
|
|
||||||
cntl->impl_type = impl_type;
|
|
||||||
cntl->var_num = var_num;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|||||||
@@ -32,20 +32,9 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct scalm_s
|
|
||||||
{
|
|
||||||
impl_t impl_type;
|
|
||||||
varnum_t var_num;
|
|
||||||
};
|
|
||||||
typedef struct scalm_s scalm_t;
|
|
||||||
|
|
||||||
#define bli_cntl_sub_scalm( cntl ) cntl->sub_scalm
|
|
||||||
|
|
||||||
void bli_scalm_cntl_init( void );
|
|
||||||
void bli_scalm_cntl_finalize( void );
|
|
||||||
scalm_t* bli_scalm_cntl_obj_create( impl_t impl_type,
|
|
||||||
varnum_t var_num );
|
|
||||||
void bli_scalm_cntl_obj_init( scalm_t* cntl,
|
|
||||||
impl_t impl_type,
|
|
||||||
varnum_t var_num );
|
|
||||||
|
|
||||||
|
cntl_t* bli_scalm_cntl_obj_create
|
||||||
|
(
|
||||||
|
void* var_func,
|
||||||
|
cntl_t* sub_node
|
||||||
|
);
|
||||||
|
|||||||
@@ -37,8 +37,7 @@
|
|||||||
#include "bli_unpackm_int.h"
|
#include "bli_unpackm_int.h"
|
||||||
|
|
||||||
#include "bli_unpackm_unb_var1.h"
|
#include "bli_unpackm_unb_var1.h"
|
||||||
//#include "bli_unpackm_blk_var1.h"
|
|
||||||
|
|
||||||
#include "bli_unpackm_blk_var2.h"
|
#include "bli_unpackm_blk_var1.h"
|
||||||
|
|
||||||
#include "bli_unpackm_cxk.h"
|
#include "bli_unpackm_cxk.h"
|
||||||
|
|||||||
@@ -52,13 +52,17 @@ typedef void (*FUNCPTR_T)(
|
|||||||
cntx_t* cntx
|
cntx_t* cntx
|
||||||
);
|
);
|
||||||
|
|
||||||
static FUNCPTR_T GENARRAY(ftypes,unpackm_blk_var2);
|
static FUNCPTR_T GENARRAY(ftypes,unpackm_blk_var1);
|
||||||
|
|
||||||
|
|
||||||
void bli_unpackm_blk_var2( obj_t* p,
|
void bli_unpackm_blk_var1
|
||||||
obj_t* c,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* p,
|
||||||
unpackm_t* cntl )
|
obj_t* c,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
|
)
|
||||||
{
|
{
|
||||||
num_t dt_cp = bli_obj_datatype( *c );
|
num_t dt_cp = bli_obj_datatype( *c );
|
||||||
|
|
||||||
@@ -266,5 +270,5 @@ void PASTEMAC(ch,varname) \
|
|||||||
\
|
\
|
||||||
}
|
}
|
||||||
|
|
||||||
INSERT_GENTFUNC_BASIC0( unpackm_blk_var2 )
|
INSERT_GENTFUNC_BASIC0( unpackm_blk_var1 )
|
||||||
|
|
||||||
@@ -32,14 +32,35 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define bli_thrinfo_sub_self( thread ) thread->sub_l3op
|
void bli_unpackm_blk_var1
|
||||||
#define bli_thrinfo_sub_opackm( thread ) thread->opackm
|
(
|
||||||
#define bli_thrinfo_sub_ipackm( thread ) thread->ipackm
|
obj_t* p,
|
||||||
|
obj_t* c,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
|
);
|
||||||
|
|
||||||
#define trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
|
||||||
#define trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
|
||||||
#define trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
|
||||||
#define trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
|
||||||
|
|
||||||
//thrinfo_t** bli_trmm_thrinfo_create_paths( bool_t jc_dependency );
|
#undef GENTPROT
|
||||||
|
#define GENTPROT( ctype, ch, varname ) \
|
||||||
|
\
|
||||||
|
void PASTEMAC(ch,varname) \
|
||||||
|
( \
|
||||||
|
struc_t strucc, \
|
||||||
|
doff_t diagoffc, \
|
||||||
|
diag_t diagc, \
|
||||||
|
uplo_t uploc, \
|
||||||
|
trans_t transc, \
|
||||||
|
dim_t m, \
|
||||||
|
dim_t n, \
|
||||||
|
dim_t m_panel, \
|
||||||
|
dim_t n_panel, \
|
||||||
|
void* p, inc_t rs_p, inc_t cs_p, \
|
||||||
|
dim_t pd_p, inc_t ps_p, \
|
||||||
|
void* c, inc_t rs_c, inc_t cs_c, \
|
||||||
|
cntx_t* cntx \
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT_GENTPROT_BASIC( unpackm_blk_var1 )
|
||||||
|
|
||||||
@@ -34,10 +34,12 @@
|
|||||||
|
|
||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
void bli_unpackm_check( obj_t* p,
|
void bli_unpackm_int_check
|
||||||
obj_t* a,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* p,
|
||||||
unpackm_t* cntl )
|
obj_t* a,
|
||||||
|
cntx_t* cntx
|
||||||
|
)
|
||||||
{
|
{
|
||||||
err_t e_val;
|
err_t e_val;
|
||||||
|
|
||||||
|
|||||||
@@ -32,7 +32,10 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void bli_unpackm_check( obj_t* p,
|
void bli_unpackm_int_check
|
||||||
obj_t* a,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* p,
|
||||||
unpackm_t* cntl );
|
obj_t* a,
|
||||||
|
cntx_t* cntx
|
||||||
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -34,42 +34,35 @@
|
|||||||
|
|
||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
unpackm_t* unpackm_cntl = NULL;
|
cntl_t* bli_unpackm_cntl_obj_create
|
||||||
|
(
|
||||||
void bli_unpackm_cntl_init()
|
void* var_func,
|
||||||
|
void* unpackm_var_func,
|
||||||
|
cntl_t* sub_node
|
||||||
|
)
|
||||||
{
|
{
|
||||||
unpackm_cntl = bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
|
cntl_t* cntl;
|
||||||
BLIS_VARIANT1,
|
unpackm_params_t* params;
|
||||||
NULL ); // no blocksize needed
|
|
||||||
}
|
|
||||||
|
|
||||||
void bli_unpackm_cntl_finalize()
|
// Allocate an unpackm_params_t struct.
|
||||||
{
|
params = bli_malloc_intl( sizeof( unpackm_params_t ) );
|
||||||
bli_cntl_obj_free( unpackm_cntl );
|
|
||||||
}
|
|
||||||
|
|
||||||
unpackm_t* bli_unpackm_cntl_obj_create( impl_t impl_type,
|
// Initialize the unpackm_params_t struct.
|
||||||
varnum_t var_num,
|
params->size = sizeof( unpackm_params_t );
|
||||||
blksz_t* b )
|
params->var_func = unpackm_var_func;
|
||||||
{
|
|
||||||
unpackm_t* cntl;
|
|
||||||
|
|
||||||
cntl = ( unpackm_t* ) bli_malloc_intl( sizeof(unpackm_t) );
|
// It's important that we set the bszid field to BLIS_NO_PART to indicate
|
||||||
|
// that no blocksize partitioning is performed. bli_cntl_free() will rely
|
||||||
cntl->impl_type = impl_type;
|
// on this information to know how to step through the thrinfo_t tree in
|
||||||
cntl->var_num = var_num;
|
// sync with the cntl_t tree.
|
||||||
cntl->b = b;
|
cntl = bli_cntl_obj_create
|
||||||
|
(
|
||||||
|
BLIS_NO_PART,
|
||||||
|
var_func,
|
||||||
|
params,
|
||||||
|
sub_node
|
||||||
|
);
|
||||||
|
|
||||||
return cntl;
|
return cntl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bli_unpackm_cntl_obj_init( unpackm_t* cntl,
|
|
||||||
impl_t impl_type,
|
|
||||||
varnum_t var_num,
|
|
||||||
blksz_t* b )
|
|
||||||
{
|
|
||||||
cntl->impl_type = impl_type;
|
|
||||||
cntl->var_num = var_num;
|
|
||||||
cntl->b = b;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|||||||
@@ -32,28 +32,23 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct unpackm_s
|
struct unpackm_params_s
|
||||||
{
|
{
|
||||||
impl_t impl_type;
|
uint64_t size; // size field must be present and come first.
|
||||||
varnum_t var_num;
|
unpackm_voft var_func;
|
||||||
blksz_t* b;
|
|
||||||
};
|
};
|
||||||
typedef struct unpackm_s unpackm_t;
|
typedef struct unpackm_params_s unpackm_params_t;
|
||||||
|
|
||||||
#define bli_cntl_sub_unpackm( cntl ) cntl->sub_unpackm
|
#define bli_cntl_unpackm_params_var_func( cntl ) \
|
||||||
#define bli_cntl_sub_unpackm_a( cntl ) cntl->sub_unpackm_a
|
\
|
||||||
#define bli_cntl_sub_unpackm_a11( cntl ) cntl->sub_unpackm_a11
|
( ( (unpackm_params_t*)(cntl)->params )->var_func )
|
||||||
#define bli_cntl_sub_unpackm_b( cntl ) cntl->sub_unpackm_b
|
|
||||||
#define bli_cntl_sub_unpackm_b11( cntl ) cntl->sub_unpackm_b11
|
// -----------------------------------------------------------------------------
|
||||||
#define bli_cntl_sub_unpackm_c( cntl ) cntl->sub_unpackm_c
|
|
||||||
#define bli_cntl_sub_unpackm_c11( cntl ) cntl->sub_unpackm_c11
|
cntl_t* bli_unpackm_cntl_obj_create
|
||||||
|
(
|
||||||
|
void* var_func,
|
||||||
|
void* unpackm_var_func,
|
||||||
|
cntl_t* sub_node
|
||||||
|
);
|
||||||
|
|
||||||
void bli_unpackm_cntl_init( void );
|
|
||||||
void bli_unpackm_cntl_finalize( void );
|
|
||||||
unpackm_t* bli_unpackm_cntl_obj_create( impl_t impl_type,
|
|
||||||
varnum_t var_num,
|
|
||||||
blksz_t* b );
|
|
||||||
void bli_unpackm_cntl_obj_init( unpackm_t* cntl,
|
|
||||||
impl_t impl_type,
|
|
||||||
varnum_t var_num,
|
|
||||||
blksz_t* b );
|
|
||||||
|
|||||||
@@ -152,15 +152,16 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
|||||||
#undef GENTFUNC
|
#undef GENTFUNC
|
||||||
#define GENTFUNC( ctype, ch, opname ) \
|
#define GENTFUNC( ctype, ch, opname ) \
|
||||||
\
|
\
|
||||||
void PASTEMAC(ch,opname)( \
|
void PASTEMAC(ch,opname) \
|
||||||
conj_t conjp, \
|
( \
|
||||||
dim_t m, \
|
conj_t conjp, \
|
||||||
dim_t n, \
|
dim_t m, \
|
||||||
void* beta, \
|
dim_t n, \
|
||||||
void* p, inc_t ldp, \
|
void* beta, \
|
||||||
void* a, inc_t inca, inc_t lda, \
|
void* p, inc_t ldp, \
|
||||||
cntx_t* cntx \
|
void* a, inc_t inca, inc_t lda, \
|
||||||
) \
|
cntx_t* cntx \
|
||||||
|
) \
|
||||||
{ \
|
{ \
|
||||||
dim_t panel_dim; \
|
dim_t panel_dim; \
|
||||||
num_t dt; \
|
num_t dt; \
|
||||||
|
|||||||
@@ -34,188 +34,43 @@
|
|||||||
|
|
||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
#define FUNCPTR_T unpackm_fp
|
void bli_unpackm_int
|
||||||
|
(
|
||||||
typedef void (*FUNCPTR_T)( obj_t* p,
|
obj_t* p,
|
||||||
obj_t* a,
|
obj_t* a,
|
||||||
cntx_t* cntx,
|
cntx_t* cntx,
|
||||||
unpackm_t* cntl );
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
static FUNCPTR_T vars[2][3] =
|
)
|
||||||
{
|
{
|
||||||
// unblocked optimized unblocked blocked
|
unpackm_voft f;
|
||||||
{ bli_unpackm_unb_var1, NULL, NULL, },
|
|
||||||
{ NULL, NULL, bli_unpackm_blk_var2, },
|
|
||||||
};
|
|
||||||
|
|
||||||
void bli_unpackm_int( obj_t* p,
|
// Check parameters.
|
||||||
obj_t* a,
|
if ( bli_error_checking_is_enabled() )
|
||||||
cntx_t* cntx,
|
bli_unpackm_int_check( p, a, cntx );
|
||||||
unpackm_t* cntl,
|
|
||||||
thrinfo_t* thread )
|
|
||||||
{
|
|
||||||
// The unpackm operation consists of an optional post-process: castm.
|
|
||||||
// (This post-process is analogous to the castm pre-process in packm.)
|
|
||||||
// Here are the following possible ways unpackm can execute:
|
|
||||||
// 1. unpack and cast: Unpack to a temporary matrix c and then cast
|
|
||||||
// c to a.
|
|
||||||
// 2. unpack only: Unpack directly to matrix a since typecasting is
|
|
||||||
// not needed.
|
|
||||||
// 3. cast only: Not yet supported / not used.
|
|
||||||
// 4. no-op: The control tree directs us to skip the unpack operation
|
|
||||||
// entirely. No action is taken.
|
|
||||||
|
|
||||||
obj_t c;
|
|
||||||
|
|
||||||
varnum_t n;
|
|
||||||
impl_t i;
|
|
||||||
FUNCPTR_T f;
|
|
||||||
|
|
||||||
// Sanity check; A should never have a zero dimension. If we must support
|
|
||||||
// it, then we should fold it into the next alias-and-early-exit block.
|
|
||||||
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
|
|
||||||
|
|
||||||
// First check if we are to skip this operation because the control tree
|
|
||||||
// is NULL, and if so, simply return.
|
|
||||||
if ( bli_cntl_is_noop( cntl ) )
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If p was aliased to a during the pack stage (because it was already
|
// If p was aliased to a during the pack stage (because it was already
|
||||||
// in an acceptable packed/contiguous format), then no unpack is actually
|
// in an acceptable packed/contiguous format), then no unpack is actually
|
||||||
// necessary, so we return.
|
// necessary, so we return.
|
||||||
if ( bli_obj_is_alias_of( *p, *a ) )
|
if ( bli_obj_is_alias_of( *p, *a ) ) return;
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check parameters.
|
// Extract the function pointer from the current control tree node.
|
||||||
if ( bli_error_checking_is_enabled() )
|
f = bli_cntl_unpackm_params_var_func( cntl );
|
||||||
bli_unpackm_check( p, a, cntx, cntl );
|
|
||||||
|
|
||||||
// Now, if we are not skipping the unpack operation, then the only
|
|
||||||
// question left is whether we are to typecast matrix a after unpacking.
|
|
||||||
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
|
|
||||||
bli_abort();
|
|
||||||
/*
|
|
||||||
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
|
|
||||||
{
|
|
||||||
// Initialize an object c for the intermediate typecast matrix.
|
|
||||||
bli_unpackm_init_cast( p,
|
|
||||||
a,
|
|
||||||
&c );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
// If no cast is needed, then aliasing object c to the original
|
|
||||||
// matrix serves as a minor optimization. This causes the unpackm
|
|
||||||
// implementation to unpack directly into matrix a.
|
|
||||||
bli_obj_alias_to( *a, c );
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now we are ready to proceed with the unpacking.
|
|
||||||
|
|
||||||
// Extract the variant number and implementation type.
|
|
||||||
n = bli_cntl_var_num( cntl );
|
|
||||||
i = bli_cntl_impl_type( cntl );
|
|
||||||
|
|
||||||
// Index into the variant array to extract the correct function pointer.
|
|
||||||
f = vars[n][i];
|
|
||||||
|
|
||||||
// Invoke the variant.
|
// Invoke the variant.
|
||||||
if( bli_thread_am_ochief( thread ) ) {
|
if ( bli_thread_am_ochief( thread ) )
|
||||||
f( p,
|
|
||||||
&c,
|
|
||||||
cntx,
|
|
||||||
cntl );
|
|
||||||
}
|
|
||||||
bli_thread_obarrier( thread );
|
|
||||||
|
|
||||||
// Now, if necessary, we cast the contents of c to matrix a. If casting
|
|
||||||
// was not necessary, then we are done because the call to the unpackm
|
|
||||||
// implementation would have unpacked directly to matrix a.
|
|
||||||
/*
|
|
||||||
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
|
|
||||||
{
|
{
|
||||||
// Copy/typecast matrix c to matrix a.
|
f
|
||||||
// NOTE: Here, we use copynzm instead of copym because, in the cases
|
(
|
||||||
// where we are unpacking/typecasting a real matrix c to a complex
|
p,
|
||||||
// matrix a, we want to touch only the real components of a, rather
|
a,
|
||||||
// than also set the imaginary components to zero. This comes about
|
cntx,
|
||||||
// because of the fact that, if we are unpacking real-to-complex,
|
cntl,
|
||||||
// then it is because all of the computation occurred in the real
|
thread
|
||||||
// domain, and so we would want to leave whatever imaginary values
|
);
|
||||||
// there are in matrix a untouched. Notice that for unpackings that
|
}
|
||||||
// entail complex-to-complex data movements, the copynzm operation
|
|
||||||
// behaves exactly as copym, so no use cases are lost (at least none
|
|
||||||
// that I can think of).
|
|
||||||
bli_copynzm( &c,
|
|
||||||
a );
|
|
||||||
|
|
||||||
// NOTE: The above code/comment is outdated. What should happen is
|
// Barrier so that unpacking is done before computation.
|
||||||
// as follows:
|
bli_thread_obarrier( thread );
|
||||||
// - If dt(a) is complex and dt(p) is real, then create an alias of
|
|
||||||
// a and then tweak it so that it looks like a real domain object.
|
|
||||||
// This will involve:
|
|
||||||
// - projecting the datatype to real domain
|
|
||||||
// - scaling both the row and column strides by 2
|
|
||||||
// ALL OF THIS should be done in the front-end, NOT here, as
|
|
||||||
// unpackm() won't even be needed in that case.
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
void bli_unpackm_init_cast( obj_t* p,
|
|
||||||
obj_t* a,
|
|
||||||
obj_t* c )
|
|
||||||
{
|
|
||||||
// The idea here is that we want to create an object c that is identical
|
|
||||||
// to object a, except that:
|
|
||||||
// (1) the storage datatype of c is equal to the target datatype of a,
|
|
||||||
// with the element size of c adjusted accordingly,
|
|
||||||
// (2) the view offset of c is reset to (0,0),
|
|
||||||
// (3) object c's main buffer is set to a new memory region acquired
|
|
||||||
// from the memory manager, or extracted from p if a mem entry is
|
|
||||||
// already available, (After acquring a mem entry from the memory
|
|
||||||
// manager, it is cached within p for quick access later on.)
|
|
||||||
// (4) object c is marked as being stored in a standard, contiguous
|
|
||||||
// format (ie: column-major order).
|
|
||||||
// Any transposition encoded within object a will also be encoded in
|
|
||||||
// object c. That way, unpackm handles any needed transposition during
|
|
||||||
// the unpacking, and the only thing the cast stage needs to do is cast.
|
|
||||||
|
|
||||||
num_t dt_targ_a = bli_obj_target_datatype( *a );
|
|
||||||
dim_t m_a = bli_obj_length( *a );
|
|
||||||
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
|
|
||||||
|
|
||||||
inc_t rs_c, cs_c;
|
|
||||||
|
|
||||||
// We begin by copying the basic fields of a.
|
|
||||||
bli_obj_alias_to( *a, *c );
|
|
||||||
|
|
||||||
// Update datatype and element size fields.
|
|
||||||
bli_obj_set_datatype( dt_targ_a, *c );
|
|
||||||
bli_obj_set_elem_size( elem_size_c, *c );
|
|
||||||
|
|
||||||
// Reset the view offsets to (0,0).
|
|
||||||
bli_obj_set_offs( 0, 0, *c );
|
|
||||||
|
|
||||||
// Check the mem_t entry of p associated with the cast buffer. If it is
|
|
||||||
// NULL, then acquire memory sufficient to hold the object data and cache
|
|
||||||
// it to p. (Otherwise, if it is non-NULL, then memory has already been
|
|
||||||
// acquired from the memory manager and cached.) We then set the main
|
|
||||||
// buffer of c to the cached address of the cast memory.
|
|
||||||
bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
|
|
||||||
|
|
||||||
// Update the strides. We set the increments to reflect column-major order
|
|
||||||
// storage. We start the leading dimension out as m(a) and increment it if
|
|
||||||
// necessary so that the beginning of each column is aligned.
|
|
||||||
cs_c = bli_align_dim_to_size( m_a, elem_size_c,
|
|
||||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
|
||||||
rs_c = 1;
|
|
||||||
bli_obj_set_strides( rs_c, cs_c, *c );
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|||||||
@@ -32,14 +32,12 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void bli_unpackm_int( obj_t* p,
|
void bli_unpackm_int
|
||||||
obj_t* a,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* p,
|
||||||
unpackm_t* cntl,
|
obj_t* a,
|
||||||
thrinfo_t* thread );
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
|
);
|
||||||
|
|
||||||
/*
|
|
||||||
void bli_unpackm_init_cast( obj_t* p,
|
|
||||||
obj_t* a,
|
|
||||||
obj_t* c );
|
|
||||||
*/
|
|
||||||
|
|||||||
@@ -50,10 +50,14 @@ typedef void (*FUNCPTR_T)(
|
|||||||
static FUNCPTR_T GENARRAY(ftypes,unpackm_unb_var1);
|
static FUNCPTR_T GENARRAY(ftypes,unpackm_unb_var1);
|
||||||
|
|
||||||
|
|
||||||
void bli_unpackm_unb_var1( obj_t* p,
|
void bli_unpackm_unb_var1
|
||||||
obj_t* c,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* p,
|
||||||
unpackm_t* cntl )
|
obj_t* c,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
|
)
|
||||||
{
|
{
|
||||||
num_t dt_pc = bli_obj_datatype( *p );
|
num_t dt_pc = bli_obj_datatype( *p );
|
||||||
|
|
||||||
|
|||||||
@@ -32,10 +32,14 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void bli_unpackm_unb_var1( obj_t* p,
|
void bli_unpackm_unb_var1
|
||||||
obj_t* c,
|
(
|
||||||
cntx_t* cntx,
|
obj_t* p,
|
||||||
unpackm_t* cntl );
|
obj_t* c,
|
||||||
|
cntx_t* cntx,
|
||||||
|
cntl_t* cntl,
|
||||||
|
thrinfo_t* thread
|
||||||
|
);
|
||||||
|
|
||||||
#undef GENTPROT
|
#undef GENTPROT
|
||||||
#define GENTPROT( ctype, ch, varname ) \
|
#define GENTPROT( ctype, ch, varname ) \
|
||||||
|
|||||||
@@ -32,9 +32,10 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "bli_gemv_cntl.h"
|
// NOTE: level-2 control tree code is temporarily disabled.
|
||||||
#include "bli_gemv_front.h"
|
//#include "bli_gemv_cntl.h"
|
||||||
#include "bli_gemv_int.h"
|
//#include "bli_gemv_front.h"
|
||||||
|
//#include "bli_gemv_int.h"
|
||||||
|
|
||||||
#include "bli_gemv_var.h"
|
#include "bli_gemv_var.h"
|
||||||
|
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ void PASTEMAC0(opname) \
|
|||||||
obj_t* beta, \
|
obj_t* beta, \
|
||||||
obj_t* y, \
|
obj_t* y, \
|
||||||
cntx_t* cntx, \
|
cntx_t* cntx, \
|
||||||
gemv_t* cntl \
|
cntl_t* cntl \
|
||||||
);
|
);
|
||||||
|
|
||||||
GENPROT( gemv_blk_var1 )
|
GENPROT( gemv_blk_var1 )
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ void PASTEMAC0(opname) \
|
|||||||
obj_t* beta, \
|
obj_t* beta, \
|
||||||
obj_t* y, \
|
obj_t* y, \
|
||||||
cntx_t* cntx, \
|
cntx_t* cntx, \
|
||||||
gemv_t* cntl \
|
cntl_t* cntl \
|
||||||
) \
|
) \
|
||||||
{ \
|
{ \
|
||||||
num_t dt = bli_obj_datatype( *a ); \
|
num_t dt = bli_obj_datatype( *a ); \
|
||||||
|
|||||||
@@ -34,43 +34,64 @@
|
|||||||
|
|
||||||
#include "blis.h"
|
#include "blis.h"
|
||||||
|
|
||||||
void bli_trsm_cntx_init( cntx_t* cntx )
|
#undef GENFRONT
|
||||||
{
|
#define GENFRONT( ftname, opname ) \
|
||||||
// Perform basic setup on the context.
|
\
|
||||||
bli_cntx_obj_create( cntx );
|
/*static gemv_vft GENARRAY(ftypes,gemv_unb_var1);*/ \
|
||||||
|
static GENARRAY_VFP(ftname,opname); \
|
||||||
|
\
|
||||||
|
void PASTEMAC0(opname) \
|
||||||
|
( \
|
||||||
|
obj_t* alpha, \
|
||||||
|
obj_t* a, \
|
||||||
|
obj_t* x, \
|
||||||
|
obj_t* beta, \
|
||||||
|
obj_t* y, \
|
||||||
|
cntx_t* cntx, \
|
||||||
|
gemv_t* cntl \
|
||||||
|
) \
|
||||||
|
{ \
|
||||||
|
num_t dt = bli_obj_datatype( *a ); \
|
||||||
|
\
|
||||||
|
trans_t transa = bli_obj_conjtrans_status( *a ); \
|
||||||
|
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||||
|
\
|
||||||
|
dim_t m = bli_obj_length( *a ); \
|
||||||
|
dim_t n = bli_obj_width( *a ); \
|
||||||
|
\
|
||||||
|
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||||
|
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||||
|
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||||
|
\
|
||||||
|
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||||
|
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||||
|
\
|
||||||
|
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||||
|
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||||
|
\
|
||||||
|
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||||
|
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \
|
||||||
|
\
|
||||||
|
PASTECH(ftname,_vft) f = PASTECH(opname,_vfp)[dt]; \
|
||||||
|
\
|
||||||
|
/* Invoke the void pointer-based function for the given datatype. */ \
|
||||||
|
f( \
|
||||||
|
transa, \
|
||||||
|
conjx, \
|
||||||
|
m, \
|
||||||
|
n, \
|
||||||
|
buf_alpha, \
|
||||||
|
buf_a, rs_a, cs_a, \
|
||||||
|
buf_x, incx, \
|
||||||
|
buf_beta, \
|
||||||
|
buf_y, incy, \
|
||||||
|
cntx \
|
||||||
|
); \
|
||||||
|
} \
|
||||||
|
|
||||||
// Initialize the context with the current architecture's native
|
GENFRONT( gemv, gemv_unb_var1 )
|
||||||
// level-3 gemm micro-kernel, and its output preferences.
|
GENFRONT( gemv, gemv_unb_var2 )
|
||||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMM_UKR, cntx );
|
|
||||||
bli_gks_cntx_set_l3_nat_ukr_prefs( BLIS_GEMM_UKR, cntx );
|
|
||||||
|
|
||||||
// Initialize the context with the current architecture's native
|
GENFRONT( gemv, gemv_unf_var1 )
|
||||||
// level-3 trsm micro-kernels.
|
GENFRONT( gemv, gemv_unf_var2 )
|
||||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_L_UKR, cntx );
|
|
||||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_U_UKR, cntx );
|
|
||||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_TRSM_L_UKR, cntx );
|
|
||||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_TRSM_U_UKR, cntx );
|
|
||||||
|
|
||||||
// Initialize the context with the current architecture's register
|
|
||||||
// and cache blocksizes (and multiples), given the execution method.
|
|
||||||
bli_gks_cntx_set_blkszs( BLIS_NAT, 6,
|
|
||||||
BLIS_NC, BLIS_NR,
|
|
||||||
BLIS_KC, BLIS_KR,
|
|
||||||
BLIS_MC, BLIS_MR,
|
|
||||||
BLIS_NR, BLIS_NR,
|
|
||||||
BLIS_MR, BLIS_MR,
|
|
||||||
BLIS_KR, BLIS_KR,
|
|
||||||
cntx );
|
|
||||||
|
|
||||||
// Set the pack_t schemas for native execution.
|
|
||||||
bli_cntx_set_pack_schema_ab( BLIS_PACKED_ROW_PANELS,
|
|
||||||
BLIS_PACKED_COL_PANELS,
|
|
||||||
cntx );
|
|
||||||
}
|
|
||||||
|
|
||||||
void bli_trsm_cntx_finalize( cntx_t* cntx )
|
|
||||||
{
|
|
||||||
// Free the context and all memory allocated to it.
|
|
||||||
bli_cntx_obj_free( cntx );
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -32,8 +32,9 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "bli_ger_cntl.h"
|
// NOTE: level-2 control tree code is temporarily disabled.
|
||||||
#include "bli_ger_front.h"
|
//#include "bli_ger_cntl.h"
|
||||||
#include "bli_ger_int.h"
|
//#include "bli_ger_front.h"
|
||||||
|
//#include "bli_ger_int.h"
|
||||||
|
|
||||||
#include "bli_ger_var.h"
|
#include "bli_ger_var.h"
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ void PASTEMAC0(opname) \
|
|||||||
obj_t* y, \
|
obj_t* y, \
|
||||||
obj_t* a, \
|
obj_t* a, \
|
||||||
cntx_t* cntx, \
|
cntx_t* cntx, \
|
||||||
ger_t* cntl \
|
cntl_t* cntl \
|
||||||
);
|
);
|
||||||
|
|
||||||
GENPROT( ger_blk_var1 )
|
GENPROT( ger_blk_var1 )
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ void PASTEMAC0(opname) \
|
|||||||
obj_t* y, \
|
obj_t* y, \
|
||||||
obj_t* a, \
|
obj_t* a, \
|
||||||
cntx_t* cntx, \
|
cntx_t* cntx, \
|
||||||
ger_t* cntl \
|
cntl_t* cntl \
|
||||||
) \
|
) \
|
||||||
{ \
|
{ \
|
||||||
num_t dt = bli_obj_datatype( *a ); \
|
num_t dt = bli_obj_datatype( *a ); \
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user