conflicts merge for bli_kernel.h

Change-Id: I15d846bd34e11f86ebfd7ed091ff671a1f3366a0
This commit is contained in:
praveeng
2016-10-06 12:35:30 +05:30
371 changed files with 8355 additions and 8283 deletions

View File

@@ -7,16 +7,17 @@ Introduction
------------ ------------
BLIS is a portable software framework for instantiating high-performance BLIS is a portable software framework for instantiating high-performance
BLAS-like dense linear algebra libraries. The framework was designed to BLAS-like dense linear algebra libraries. The framework was designed to isolate
isolate essential kernels of computation that, when optimized, immediately essential kernels of computation that, when optimized, immediately enable
enable optimized implementations of most of its commonly used and optimized implementations of most of its commonly used and computationally
computationally intensive operations. BLIS is written in [ISO intensive operations. BLIS is written in [ISO
C99](http://en.wikipedia.org/wiki/C99) and available under a C99](http://en.wikipedia.org/wiki/C99) and available under a
[new/modified/3-clause BSD [new/modified/3-clause BSD
license](http://opensource.org/licenses/BSD-3-Clause). While BLIS exports a license](http://opensource.org/licenses/BSD-3-Clause). While BLIS exports a
[new BLAS-like API](), it also includes a BLAS compatibility layer which gives [new BLAS-like API](https://github.com/flame/blis/wiki/BLISAPIQuickReference),
application developers access to BLIS implementations via traditional [BLAS it also includes a BLAS compatibility layer which gives application developers
routine calls](http://www.netlib.org/lapack/lug/node145.html). access to BLIS implementations via traditional [BLAS routine
calls](http://www.netlib.org/lapack/lug/node145.html).
For a thorough presentation of our framework, please read our recently accepted For a thorough presentation of our framework, please read our recently accepted
journal article, ["BLIS: A Framework for Rapidly Instantiating BLAS journal article, ["BLIS: A Framework for Rapidly Instantiating BLAS

View File

@@ -125,6 +125,18 @@
#define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS
#endif #endif
// zgemm micro-kernel
#if 1
#define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4
#define BLIS_DEFAULT_MC_Z 72
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 4080
#define BLIS_DEFAULT_MR_Z 3
#define BLIS_DEFAULT_NR_Z 4
#define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS
#endif
// -- trsm-related -- // -- trsm-related --

9
configure vendored
View File

@@ -91,7 +91,7 @@ print_usage()
echo " -t MODEL, --enable-threading[=MODEL], --disable-threading" echo " -t MODEL, --enable-threading[=MODEL], --disable-threading"
echo " " echo " "
echo " Enable threading in the library, using threading model" echo " Enable threading in the library, using threading model"
echo " MODEL={omp,pthreads,no}. If MODEL=no or " echo " MODEL={openmp,pthreads,no}. If MODEL=no or "
echo " --disable-threading is specified, threading will be" echo " --disable-threading is specified, threading will be"
echo " disabled. The default is 'no'." echo " disabled. The default is 'no'."
echo " " echo " "
@@ -486,13 +486,18 @@ main()
# Check the threading model flag. # Check the threading model flag.
<<<<<<< HEAD
=======
# NOTE: 'omp' is deprecated but still supported; 'openmp' is preferred.
>>>>>>> origin/master
enable_openmp='no' enable_openmp='no'
enable_openmp_01=0 enable_openmp_01=0
enable_pthreads='no' enable_pthreads='no'
enable_pthreads_01=0 enable_pthreads_01=0
if [ "x${threading_model}" = "xauto" ]; then if [ "x${threading_model}" = "xauto" ]; then
echo "${script_name}: determining the threading model automatically." echo "${script_name}: determining the threading model automatically."
elif [ "x${threading_model}" = "xomp" ]; then elif [ "x${threading_model}" = "xopenmp" ] ||
[ "x${threading_model}" = "xomp" ]; then
echo "${script_name}: using OpenMP for threading." echo "${script_name}: using OpenMP for threading."
enable_openmp='yes' enable_openmp='yes'
enable_openmp_01=1 enable_openmp_01=1

View File

@@ -99,8 +99,8 @@ void bli_getsc_check
// Check object datatypes. // Check object datatypes.
e_val = bli_check_noninteger_object( chi ); //e_val = bli_check_noninteger_object( chi );
bli_check_error_code( e_val ); //bli_check_error_code( e_val );
// Check object dimensions. // Check object dimensions.
@@ -125,8 +125,8 @@ void bli_setsc_check
// Check object datatypes. // Check object datatypes.
e_val = bli_check_floating_object( chi ); //e_val = bli_check_floating_object( chi );
bli_check_error_code( e_val ); //bli_check_error_code( e_val );
// Check object dimensions. // Check object dimensions.

View File

@@ -198,8 +198,8 @@ void PASTEMAC0(opname) \
if ( bli_is_constant( dt_chi ) ) dt_use = dt_def; \ if ( bli_is_constant( dt_chi ) ) dt_use = dt_def; \
else dt_use = dt_chi; \ else dt_use = dt_chi; \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function (with integer support). */ \
bli_call_ft_3 \ bli_call_ft_3i \
( \ ( \
dt_use, \ dt_use, \
opname, \ opname, \
@@ -229,8 +229,8 @@ void PASTEMAC0(opname) \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \ PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function (with integer support). */ \
bli_call_ft_3 \ bli_call_ft_3i \
( \ ( \
dt_chi, \ dt_chi, \
opname, \ opname, \

View File

@@ -227,3 +227,25 @@ void PASTEMAC(ch,opname) \
INSERT_GENTFUNCR_BASIC0( zipsc ) INSERT_GENTFUNCR_BASIC0( zipsc )
// -----------------------------------------------------------------------------
void bli_igetsc
(
dim_t* chi,
double* zeta_r,
double* zeta_i
)
{
PASTEMAC2(i,d,gets)( *chi, *zeta_r, *zeta_i );
}
void bli_isetsc
(
double zeta_r,
double zeta_i,
dim_t* chi
)
{
PASTEMAC2(d,i,sets)( zeta_r, zeta_i, *chi );
}

View File

@@ -141,3 +141,19 @@ void PASTEMAC(ch,opname) \
INSERT_GENTPROTR_BASIC( zipsc ) INSERT_GENTPROTR_BASIC( zipsc )
// -----------------------------------------------------------------------------
void bli_igetsc
(
dim_t* chi,
double* zeta_r,
double* zeta_i
);
void bli_isetsc
(
double zeta_r,
double zeta_i,
dim_t* chi
);

View File

@@ -46,12 +46,14 @@
#include "bli_l1v_tapi.h" #include "bli_l1v_tapi.h"
// Pack-related // Pack-related
#include "bli_packv.h" // NOTE: packv and unpackv are temporarily disabled.
#include "bli_unpackv.h" //#include "bli_packv.h"
//#include "bli_unpackv.h"
// Other // Other
#include "bli_scalv_cntl.h" // NOTE: scalv control tree code is temporarily disabled.
#include "bli_scalv_int.h" //#include "bli_scalv_cntl.h"
//#include "bli_scalv_int.h"
// Reference kernel headers // Reference kernel headers
#include "bli_l1v_ref.h" #include "bli_l1v_ref.h"

View File

@@ -56,6 +56,21 @@ GENFRONT( subv )
GENFRONT( swapv ) GENFRONT( swapv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* x, \
obj_t* index \
) \
{ \
bli_l1v_xi_check( x, index ); \
}
GENFRONT( amaxv )
#undef GENFRONT #undef GENFRONT
#define GENFRONT( opname ) \ #define GENFRONT( opname ) \
\ \
@@ -481,3 +496,39 @@ void bli_l1v_ax_check
bli_check_error_code( e_val ); bli_check_error_code( e_val );
} }
void bli_l1v_xi_check
(
obj_t* x,
obj_t* index
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_integer_object( index );
bli_check_error_code( e_val );
e_val = bli_check_nonconstant_object( index );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( index );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( index );
bli_check_error_code( e_val );
}

View File

@@ -44,7 +44,7 @@ void PASTEMAC(opname,_check) \
( \ ( \
obj_t* x, \ obj_t* x, \
obj_t* y \ obj_t* y \
); );
GENTPROT( addv ) GENTPROT( addv )
GENTPROT( copyv ) GENTPROT( copyv )
@@ -52,6 +52,18 @@ GENTPROT( subv )
GENTPROT( swapv ) GENTPROT( swapv )
#undef GENTPROT
#define GENTPROT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* x, \
obj_t* index \
);
GENTPROT( amaxv )
#undef GENTPROT #undef GENTPROT
#define GENTPROT( opname ) \ #define GENTPROT( opname ) \
\ \
@@ -74,7 +86,7 @@ void PASTEMAC(opname,_check) \
obj_t* alpha, \ obj_t* alpha, \
obj_t* x, \ obj_t* x, \
obj_t* y \ obj_t* y \
); );
GENTPROT( axpyv ) GENTPROT( axpyv )
GENTPROT( scal2v ) GENTPROT( scal2v )
@@ -88,7 +100,7 @@ void PASTEMAC(opname,_check) \
obj_t* x, \ obj_t* x, \
obj_t* y, \ obj_t* y, \
obj_t* rho \ obj_t* rho \
); );
GENTPROT( dotv ) GENTPROT( dotv )
@@ -103,7 +115,7 @@ void PASTEMAC(opname,_check) \
obj_t* y, \ obj_t* y, \
obj_t* beta, \ obj_t* beta, \
obj_t* rho \ obj_t* rho \
); );
GENTPROT( dotxv ) GENTPROT( dotxv )
@@ -114,7 +126,7 @@ GENTPROT( dotxv )
void PASTEMAC(opname,_check) \ void PASTEMAC(opname,_check) \
( \ ( \
obj_t* x \ obj_t* x \
); );
GENTPROT( invertv ) GENTPROT( invertv )
@@ -126,7 +138,7 @@ void PASTEMAC(opname,_check) \
( \ ( \
obj_t* alpha, \ obj_t* alpha, \
obj_t* x \ obj_t* x \
); );
GENTPROT( scalv ) GENTPROT( scalv )
GENTPROT( setv ) GENTPROT( setv )
@@ -196,3 +208,9 @@ void bli_l1v_ax_check
obj_t* x obj_t* x
); );
void bli_l1v_xi_check
(
obj_t* x,
obj_t* index
);

View File

@@ -55,6 +55,7 @@ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \
} }
GENFRONT( addv, BLIS_ADDV_KER ) GENFRONT( addv, BLIS_ADDV_KER )
GENFRONT( amaxv, BLIS_AMAXV_KER )
GENFRONT( copyv, BLIS_COPYV_KER ) GENFRONT( copyv, BLIS_COPYV_KER )
GENFRONT( dotv, BLIS_DOTV_KER ) GENFRONT( dotv, BLIS_DOTV_KER )
GENFRONT( dotxv, BLIS_DOTXV_KER ) GENFRONT( dotxv, BLIS_DOTXV_KER )

View File

@@ -44,6 +44,7 @@ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ); \
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ); void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx );
GENPROT( addv ) GENPROT( addv )
GENPROT( amaxv )
GENPROT( axpbyv ) GENPROT( axpbyv )
GENPROT( axpyv ) GENPROT( axpyv )
GENPROT( copyv ) GENPROT( copyv )

View File

@@ -58,6 +58,21 @@ INSERT_GENTDEF( addv )
INSERT_GENTDEF( copyv ) INSERT_GENTDEF( copyv )
INSERT_GENTDEF( subv ) INSERT_GENTDEF( subv )
// amaxv
#undef GENTDEF
#define GENTDEF( ctype, ch, opname, tsuf ) \
\
typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
dim_t n, \
ctype* restrict x, inc_t incx, \
dim_t* restrict index, \
cntx_t* cntx \
);
INSERT_GENTDEF( amaxv )
// axpbyv // axpbyv
#undef GENTDEF #undef GENTDEF

View File

@@ -54,6 +54,20 @@ INSERT_GENTPROT_BASIC( copyv_ker_name )
INSERT_GENTPROT_BASIC( subv_ker_name ) INSERT_GENTPROT_BASIC( subv_ker_name )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
dim_t n, \
ctype* restrict x, inc_t incx, \
dim_t* restrict index, \
cntx_t* cntx \
); \
INSERT_GENTPROT_BASIC( amaxv_ker_name )
#undef GENTPROT #undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \ #define GENTPROT( ctype, ch, opname ) \
\ \

View File

@@ -82,6 +82,44 @@ GENFRONT( copyv )
GENFRONT( subv ) GENFRONT( subv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
obj_t* x, \
obj_t* index \
BLIS_OAPI_CNTX_PARAM \
) \
{ \
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
\
void* buf_index = bli_obj_buffer_at_off( *index ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, index ); \
\
/* Invoke the typed function. */ \
bli_call_ft_5 \
( \
dt, \
opname, \
n, \
buf_x, incx, \
buf_index, \
cntx \
); \
}
GENFRONT( amaxv )
#undef GENFRONT #undef GENFRONT
#define GENFRONT( opname ) \ #define GENFRONT( opname ) \
\ \

View File

@@ -52,6 +52,19 @@ GENTPROT( copyv )
GENTPROT( subv ) GENTPROT( subv )
#undef GENTPROT
#define GENTPROT( opname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
obj_t* x, \
obj_t* index \
BLIS_OAPI_CNTX_PARAM \
);
GENTPROT( amaxv )
#undef GENTPROT #undef GENTPROT
#define GENTPROT( opname ) \ #define GENTPROT( opname ) \
\ \

View File

@@ -74,6 +74,38 @@ INSERT_GENTFUNC_BASIC( copyv, BLIS_COPYV_KER )
INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER ) INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, kerid ) \
\
void PASTEMAC(ch,opname) \
( \
dim_t n, \
ctype* x, inc_t incx, \
dim_t* index, \
cntx_t* cntx \
) \
{ \
const num_t dt = PASTEMAC(ch,type); \
cntx_t* cntx_p; \
\
bli_cntx_init_local_if( opname, cntx, cntx_p ); \
\
PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \
\
f \
( \
n, \
x, incx, \
index, \
cntx_p \
); \
\
bli_cntx_finalize_local_if( opname, cntx ); \
}
INSERT_GENTFUNC_BASIC( amaxv, BLIS_AMAXV_KER )
#undef GENTFUNC #undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, kerid ) \ #define GENTFUNC( ctype, ch, opname, kerid ) \
\ \

View File

@@ -40,6 +40,9 @@
#undef addv_ker_name #undef addv_ker_name
#define addv_ker_name addv #define addv_ker_name addv
#undef amaxv_ker_name
#define amaxv_ker_name amaxv
#undef axpbyv_ker_name #undef axpbyv_ker_name
#define axpbyv_ker_name axpbyv #define axpbyv_ker_name axpbyv

View File

@@ -0,0 +1,134 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
//
// Define BLAS-like interfaces with typed operands.
//
#undef GENTFUNCR
#define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \
\
void PASTEMAC(ch,varname) \
( \
dim_t n, \
ctype* x, inc_t incx, \
dim_t* i_max, \
cntx_t* cntx \
) \
{ \
ctype_r* minus_one = PASTEMAC(chr,m1); \
dim_t* zero_i = PASTEMAC(i,0); \
\
ctype_r chi1_r; \
ctype_r chi1_i; \
ctype_r abs_chi1; \
ctype_r abs_chi1_max; \
dim_t i; \
\
/* Initialize the index of the maximum absolute value to zero. */ \
PASTEMAC(i,copys)( zero_i, *i_max ); \
\
/* If the vector length is zero, return early. This directly emulates
the behavior of netlib BLAS's i?amax() routines. */ \
if ( bli_zero_dim1( n ) ) return; \
\
/* Initialize the maximum absolute value search candidate with
-1, which is guaranteed to be less than all values we will
compute. */ \
PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \
\
if ( incx == 1 ) \
{ \
for ( i = 0; i < n; ++i ) \
{ \
/* Get the real and imaginary components of chi1. */ \
PASTEMAC2(ch,chr,gets)( x[i], chi1_r, chi1_i ); \
\
/* Replace chi1_r and chi1_i with their absolute values. */ \
PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
\
/* Add the real and imaginary absolute values together. */ \
PASTEMAC(chr,set0s)( abs_chi1 ); \
PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \
PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \
\
/* If the absolute value of the current element exceeds that of
the previous largest, save it and its index. If NaN is
encountered, then treat it the same as if it were a valid
value that was smaller than any previously seen. This
behavior mimics that of LAPACK's ?lange(). */ \
if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \
{ \
abs_chi1_max = abs_chi1; \
*i_max = i; \
} \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
ctype* chi1 = x + (i )*incx; \
\
/* Get the real and imaginary components of chi1. */ \
PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \
\
/* Replace chi1_r and chi1_i with their absolute values. */ \
PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
\
/* Add the real and imaginary absolute values together. */ \
PASTEMAC(chr,set0s)( abs_chi1 ); \
PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \
PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \
\
/* If the absolute value of the current element exceeds that of
the previous largest, save it and its index. If NaN is
encountered, then treat it the same as if it were a valid
value that was smaller than any previously seen. This
behavior mimics that of LAPACK's ?lange(). */ \
if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \
{ \
abs_chi1_max = abs_chi1; \
*i_max = i; \
} \
} \
} \
}
INSERT_GENTFUNCR_BASIC0( amaxv_ref )

View File

@@ -34,6 +34,7 @@
#include "blis.h" #include "blis.h"
#if 0
packv_t* packv_cntl = NULL; packv_t* packv_cntl = NULL;
void bli_packv_cntl_init( void ) void bli_packv_cntl_init( void )
@@ -77,4 +78,41 @@ void bli_packv_cntl_obj_init( packv_t* cntl,
cntl->bmid = bmid; cntl->bmid = bmid;
cntl->pack_schema = pack_schema; cntl->pack_schema = pack_schema;
} }
#endif
cntl_t* bli_packv_cntl_obj_create
(
void* var_func,
void* packv_var_func,
bszid_t bmid,
pack_t pack_schema,
cntl_t* sub_node
)
{
cntl_t* cntl;
packv_params_t* params;
// Allocate a packv_params_t struct.
params = bli_malloc_intl( sizeof( packv_params_t ) );
// Initialize the packv_params_t struct.
params->size = sizeof( packv_params_t );
params->packv_var_func = packv_var_func;
params->bmid = bmid;
params->pack_schema = pack_schema;
// It's important that we set the bszid field to BLIS_NO_PART to indicate
// that no blocksize partitioning is performed. bli_cntl_free() will rely
// on this information to know how to step through the thrinfo_t tree in
// sync with the cntl_t tree.
cntl = bli_cntl_obj_create
(
BLIS_NO_PART,
var_func,
params,
sub_node
);
return cntl;
}

View File

@@ -0,0 +1,67 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
struct packv_params_s
{
uint64_t size
packv_voft* var_func;
bszid_t bmid;
pack_t pack_schema;
};
typedef struct packv_params_s packv_params_t;
#define bli_cntl_packv_params_var_func( cntl ) \
\
( (packv_params_t*)( cntl->params )->var_func )
#define bli_cntl_packv_params_bmid( cntl ) \
\
( (packv_params_t*)( cntl->params )->bmid_m )
#define bli_cntl_packv_params_pack_schema( cntl ) \
\
( (packv_params_t*)( cntl->params )->pack_schema )
// -----------------------------------------------------------------------------
cntl_t* bli_packv_cntl_obj_create
(
void* var_func,
void* packv_var_func,
bszid_t bmid,
pack_t pack_schema,
cntl_t* sub_node
);

View File

@@ -52,7 +52,6 @@ void bli_packv_init
pack_t pack_schema; pack_t pack_schema;
bszid_t bmult_id; bszid_t bmult_id;
obj_t c;
// Check parameters. // Check parameters.
if ( bli_error_checking_is_enabled() ) if ( bli_error_checking_is_enabled() )
@@ -84,26 +83,6 @@ void bli_packv_init
// left is whether we are to typecast vector a before packing. // left is whether we are to typecast vector a before packing.
if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) ) if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) )
bli_abort(); bli_abort();
/*
{
// Initialize an object c for the intermediate typecast vector.
bli_packv_init_cast( a,
p,
&c );
// Copy/typecast vector a to vector c.
bli_copyv( a,
&c );
}
else
*/
{
// If no cast is needed, then aliasing object c to the original
// vector serves as a minor optimization. This causes the packv
// implementation to pack directly from vector a.
bli_obj_alias_to( *a, c );
}
// Extract various fields from the control tree and pass them in // Extract various fields from the control tree and pass them in
// explicitly into _init_pack(). This allows external code generators // explicitly into _init_pack(). This allows external code generators
@@ -116,7 +95,7 @@ void bli_packv_init
( (
pack_schema, pack_schema,
bmult_id, bmult_id,
&c, &a,
p, p,
cntx cntx
); );
@@ -125,22 +104,24 @@ void bli_packv_init
} }
void bli_packv_init_pack siz_t bli_packv_init_pack
( (
pack_t pack_schema, pack_t schema,
bszid_t bmult_id, bszid_t bmult_id,
obj_t* c, obj_t* a,
obj_t* p, obj_t* p,
cntx_t* cntx cntx_t* cntx
) )
{ {
num_t dt = bli_obj_datatype( *c ); num_t dt = bli_obj_datatype( *a );
dim_t dim_c = bli_obj_vector_dim( *c ); dim_t dim_a = bli_obj_vector_dim( *a );
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx ); dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );
membrk_t* membrk = bli_cntx_membrk( cntx ); membrk_t* membrk = bli_cntx_membrk( cntx );
#if 0
mem_t* mem_p; mem_t* mem_p;
#endif
dim_t m_p_pad; dim_t m_p_pad;
siz_t size_p; siz_t size_p;
inc_t rs_p, cs_p; inc_t rs_p, cs_p;
@@ -148,21 +129,17 @@ void bli_packv_init_pack
// We begin by copying the basic fields of c. // We begin by copying the basic fields of c.
bli_obj_alias_to( *c, *p ); bli_obj_alias_to( *a, *p );
// Update the dimensions. // Update the dimensions.
bli_obj_set_dims( dim_c, 1, *p ); bli_obj_set_dims( dim_a, 1, *p );
// Reset the view offsets to (0,0). // Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *p ); bli_obj_set_offs( 0, 0, *p );
// Set the pack schema in the p object to the value in the control tree // Set the pack schema in the p object to the value in the control tree
// node. // node.
bli_obj_set_pack_schema( pack_schema, *p ); bli_obj_set_pack_schema( schema, *p );
// Extract the address of the mem_t object within p that will track
// properties of the packed buffer.
mem_p = bli_obj_pack_mem( *p );
// Compute the dimensions padded by the dimension multiples. // Compute the dimensions padded by the dimension multiples.
m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( *p ), bmult ); m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( *p ), bmult );
@@ -170,6 +147,11 @@ void bli_packv_init_pack
// Compute the size of the packed buffer. // Compute the size of the packed buffer.
size_p = m_p_pad * 1 * bli_obj_elem_size( *p ); size_p = m_p_pad * 1 * bli_obj_elem_size( *p );
#if 0
// Extract the address of the mem_t object within p that will track
// properties of the packed buffer.
mem_p = bli_obj_pack_mem( *p );
if ( bli_mem_is_unalloc( mem_p ) ) if ( bli_mem_is_unalloc( mem_p ) )
{ {
// If the mem_t object of p has not yet been allocated, then acquire // If the mem_t object of p has not yet been allocated, then acquire
@@ -192,19 +174,19 @@ void bli_packv_init_pack
} }
} }
// Save the padded (packed) dimensions into the packed object.
bli_obj_set_padded_dims( m_p_pad, 1, *p );
// Grab the buffer address from the mem_t object and copy it to the // Grab the buffer address from the mem_t object and copy it to the
// main object buffer field. (Sometimes this buffer address will be // main object buffer field. (Sometimes this buffer address will be
// copied when the value is already up-to-date, because it persists // copied when the value is already up-to-date, because it persists
// in the main object buffer field across loop iterations.) // in the main object buffer field across loop iterations.)
buf = bli_mem_buffer( mem_p ); buf = bli_mem_buffer( mem_p );
bli_obj_set_buffer( buf, *p ); bli_obj_set_buffer( buf, *p );
#endif
// Save the padded (packed) dimensions into the packed object.
bli_obj_set_padded_dims( m_p_pad, 1, *p );
// Set the row and column strides of p based on the pack schema. // Set the row and column strides of p based on the pack schema.
if ( pack_schema == BLIS_PACKED_VECTOR ) if ( schema == BLIS_PACKED_VECTOR )
{ {
// Set the strides to reflect a column-stored vector. Note that the // Set the strides to reflect a column-stored vector. Note that the
// column stride may never be used, and is only useful to determine // column stride may never be used, and is only useful to determine
@@ -215,8 +197,11 @@ void bli_packv_init_pack
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, *p );
} }
return size_p;
} }
#if 0
void bli_packv_release void bli_packv_release
( (
obj_t* p, obj_t* p,
@@ -226,52 +211,4 @@ void bli_packv_release
if ( !bli_cntl_is_noop( cntl ) ) if ( !bli_cntl_is_noop( cntl ) )
bli_obj_release_pack( p ); bli_obj_release_pack( p );
} }
#endif
/*
void bli_packv_init_cast( obj_t* a,
obj_t* p,
obj_t* c )
{
// The idea here is that we want to create an object c that is identical
// to object a, except that:
// (1) the storage datatype of c is equal to the target datatype of a,
// with the element size of c adjusted accordingly,
// (2) object c is marked as being stored in a standard, contiguous
// format (ie: a column vector),
// (3) the view offset of c is reset to (0,0), and
// (4) object c's main buffer is set to a new memory region acquired
// from the memory manager, or extracted from p if a mem entry is
// already available. (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.)
num_t dt_targ_a = bli_obj_target_datatype( *a );
dim_t dim_a = bli_obj_vector_dim( *a );
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
// We begin by copying the basic fields of a.
bli_obj_alias_to( *a, *c );
// Update datatype and element size fields.
bli_obj_set_datatype( dt_targ_a, *c );
bli_obj_set_elem_size( elem_size_c, *c );
// Update the dimensions.
bli_obj_set_dims( dim_a, 1, *c );
// Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *c );
// Check the mem_t entry of p associated with the cast buffer. If it is
// NULL, then acquire memory sufficient to hold the object data and cache
// it to p. (Otherwise, if it is non-NULL, then memory has already been
// acquired from the memory manager and cached.) We then set the main
// buffer of c to the cached address of the cast memory.
bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
// Update the strides. We set the increments to reflect a column storage.
// Note that the column stride should never be used.
bli_obj_set_strides( 1, dim_a, *c );
}
*/

View File

@@ -40,23 +40,12 @@ void bli_packv_init
packv_t* cntl packv_t* cntl
); );
void bli_packv_init_pack siz_t bli_packv_init_pack
( (
pack_t pack_schema, pack_t pack_schema,
bszid_t bmult_id, bszid_t bmult_id,
obj_t* c, obj_t* a,
obj_t* p, obj_t* p,
cntx_t* cntx cntx_t* cntx
); );
void bli_packv_release
(
obj_t* p,
packv_t* cntl
);
/*
void bli_packv_init_cast( obj_t* a,
obj_t* p,
obj_t* c );
*/

View File

@@ -47,27 +47,23 @@ static FUNCPTR_T vars[1][3] =
{ bli_packv_unb_var1, NULL, NULL } { bli_packv_unb_var1, NULL, NULL }
}; };
void bli_packv_int( obj_t* a, void bli_packv_int
obj_t* p, (
cntx_t* cntx, obj_t* a,
packv_t* cntl ) obj_t* p,
cntx_t* cntx,
cntl_t* cntl
)
{ {
// The packv operation consists of an optional typecasting pre-process. #if 0
// Here are the following possible ways packv can execute:
// 1. cast and pack: When typecasting and packing are both
// precribed, typecast a to temporary vector c and then pack
// c to p.
// 2. pack only: Typecasting is skipped when it is not needed;
// simply pack a directly to p.
// 3. cast only: Not yet supported / not used.
// 4. no-op: The control tree sometimes directs us to skip the
// pack operation entirely. Alias p to a and return.
//obj_t c;
varnum_t n; varnum_t n;
impl_t i; impl_t i;
FUNCPTR_T f; #endif
packv_voft f;
// !!!
// DEFINE packv_voft type.
// !!!
// Check parameters. // Check parameters.
if ( bli_error_checking_is_enabled() ) if ( bli_error_checking_is_enabled() )

View File

@@ -36,6 +36,7 @@
#include "bli_l1m_check.h" #include "bli_l1m_check.h"
#include "bli_l1m_ft.h" #include "bli_l1m_ft.h"
#include "bli_l1m_voft.h"
// Prototype object APIs with and without contexts. // Prototype object APIs with and without contexts.
#include "bli_oapi_w_cntx.h" #include "bli_oapi_w_cntx.h"
@@ -51,6 +52,5 @@
#include "bli_unpackm.h" #include "bli_unpackm.h"
// Other // Other
#include "bli_scalm_cntl.h" #include "bli_scalm.h"
#include "bli_scalm_int.h"

75
frame/1m/bli_l1m_voft.h Normal file
View File

@@ -0,0 +1,75 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_L1M_VAR_OFT_H
#define BLIS_L1M_VAR_OFT_H
//
// -- Level-3 variant function types -------------------------------------------
//
#undef GENTDEF
#define GENTDEF( opname ) \
\
typedef void (*PASTECH(opname,_voft)) \
( \
obj_t* a, \
obj_t* p, \
cntx_t* cntx, \
cntl_t* cntl, \
thrinfo_t* thread \
);
GENTDEF( packm )
#undef GENTDEF
#define GENTDEF( opname ) \
\
typedef void (*PASTECH(opname,_voft)) \
( \
obj_t* p, \
obj_t* a, \
cntx_t* cntx, \
cntl_t* cntl, \
thrinfo_t* thread \
);
GENTDEF( unpackm )
#endif

View File

@@ -93,10 +93,14 @@ static func_t packm_struc_cxk_kers[BLIS_NUM_PACK_SCHEMA_TYPES] =
}; };
void bli_packm_blk_var1( obj_t* c, void bli_packm_blk_var1
obj_t* p, (
cntx_t* cntx, obj_t* c,
thrinfo_t* t ) obj_t* p,
cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* t
)
{ {
num_t dt_cp = bli_obj_datatype( *c ); num_t dt_cp = bli_obj_datatype( *c );
@@ -140,7 +144,7 @@ void bli_packm_blk_var1( obj_t* c,
// whether we are executing an induced method. // whether we are executing an induced method.
if ( bli_is_nat_packed( schema ) ) if ( bli_is_nat_packed( schema ) )
{ {
// This branch if for native execution, where we assume that // This branch is for native execution, where we assume that
// the micro-kernel will always apply the alpha scalar of the // the micro-kernel will always apply the alpha scalar of the
// higher-level operation. Thus, we use BLIS_ONE for kappa so // higher-level operation. Thus, we use BLIS_ONE for kappa so
// that the underlying packm implementation does not perform // that the underlying packm implementation does not perform
@@ -156,28 +160,25 @@ void bli_packm_blk_var1( obj_t* c,
// real domain micro-kernels. (In the aforementioned situation, // real domain micro-kernels. (In the aforementioned situation,
// applying a real scalar is easy, but applying a complex one is // applying a real scalar is easy, but applying a complex one is
// harder, so we avoid the need altogether with the code below.) // harder, so we avoid the need altogether with the code below.)
if( bli_thread_am_ochief( t ) ) if ( bli_obj_scalar_has_nonzero_imag( p ) )
{ {
if ( bli_obj_scalar_has_nonzero_imag( p ) ) //printf( "applying non-zero imag kappa\n" );
{
//printf( "applying non-zero imag kappa\n" ); // Detach the scalar.
// Detach the scalar. bli_obj_scalar_detach( p, &kappa );
bli_obj_scalar_detach( p, &kappa );
// Reset the attached scalar (to 1.0).
// Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p );
bli_obj_scalar_reset( p );
kappa_p = &kappa;
kappa_p = &kappa; }
} else
else {
{ // If the internal scalar of A has only a real component, then
// If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will
// we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing.
// use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE;
kappa_p = &BLIS_ONE;
}
} }
kappa_p = bli_thread_obroadcast( t, kappa_p );
// Acquire the buffer to the kappa chosen above. // Acquire the buffer to the kappa chosen above.
buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p ); buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
@@ -194,7 +195,12 @@ void bli_packm_blk_var1( obj_t* c,
bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers; bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers;
else packm_kers = packm_struc_cxk_kers; else packm_kers = packm_struc_cxk_kers;
#else #else
func_t* cntx_packm_kers = bli_cntx_get_packm_ukr( cntx ); // The original idea here was to read the packm_ukr from the context
// if it is non-NULL. The problem is, it requires that we be able to
// assume that the packm_ukr field is initialized to NULL, which it
// currently is not.
//func_t* cntx_packm_kers = bli_cntx_get_packm_ukr( cntx );
//if ( bli_func_is_null_dt( dt_cp, cntx_packm_kers ) ) //if ( bli_func_is_null_dt( dt_cp, cntx_packm_kers ) )
{ {
@@ -203,7 +209,6 @@ void bli_packm_blk_var1( obj_t* c,
// we use the default lookup table to determine the right func_t // we use the default lookup table to determine the right func_t
// for the current schema. // for the current schema.
const dim_t i = bli_pack_schema_index( schema ); const dim_t i = bli_pack_schema_index( schema );
//printf( "bli_packm_blk_var1: pack schema index = %lu (schema = %x)\n", i, schema );
packm_kers = &packm_struc_cxk_kers[ i ]; packm_kers = &packm_struc_cxk_kers[ i ];
} }
@@ -221,11 +226,6 @@ void bli_packm_blk_var1( obj_t* c,
// Query the datatype-specific function pointer from the func_t object. // Query the datatype-specific function pointer from the func_t object.
packm_ker = bli_func_get_dt( dt_cp, packm_kers ); packm_ker = bli_func_get_dt( dt_cp, packm_kers );
//bli_cntx_print( cntx );
//printf( "bli_packm_blk_var1: packm_ker = %p\n", packm_ker );
//printf( "bli_packm_blk_var1: cntx_packm_ker = %p\n", cntx_packm_kers );
//printf( "bli_packm_blk_var1: local_table_entry = %p\n", &packm_struc_cxk_kers[ bli_pack_schema_index( schema ) ] );
// Index into the type combination array to extract the correct // Index into the type combination array to extract the correct
// function pointer. // function pointer.
f = ftypes[dt_cp]; f = ftypes[dt_cp];
@@ -598,6 +598,57 @@ PASTEMAC(ch,fprintm)( stdout, "packm_var2: a", m, n, \
p_inc = ps_p; \ p_inc = ps_p; \
} \ } \
\ \
/*
if ( col_stored ) { \
if ( bli_thread_work_id( thread ) == 0 ) \
{ \
printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
fflush( stdout ); \
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \
} \
bli_thread_obarrier( thread ); \
if ( bli_thread_work_id( thread ) == 1 ) \
{ \
printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
fflush( stdout ); \
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \
} \
bli_thread_obarrier( thread ); \
} \
else { \
if ( bli_thread_work_id( thread ) == 0 ) \
{ \
printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
fflush( stdout ); \
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \
} \
bli_thread_obarrier( thread ); \
if ( bli_thread_work_id( thread ) == 1 ) \
{ \
printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
fflush( stdout ); \
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
fflush( stdout ); \
} \
bli_thread_obarrier( thread ); \
} \
*/ \
\
/* /*
if ( bli_is_4mi_packed( schema ) ) { \ if ( bli_is_4mi_packed( schema ) ) { \
printf( "packm_var2: is_p_use = %lu\n", is_p_use ); \ printf( "packm_var2: is_p_use = %lu\n", is_p_use ); \

View File

@@ -32,10 +32,14 @@
*/ */
void bli_packm_blk_var1( obj_t* c, void bli_packm_blk_var1
obj_t* p, (
cntx_t* cntx, obj_t* c,
thrinfo_t* t ); obj_t* p,
cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* t
);
#undef GENTPROT #undef GENTPROT

View File

@@ -35,9 +35,12 @@
#include "blis.h" #include "blis.h"
void bli_packm_init_check( obj_t* a, void bli_packm_init_check
obj_t* p, (
cntx_t* cntx ) obj_t* a,
obj_t* p,
cntx_t* cntx
)
{ {
err_t e_val; err_t e_val;
@@ -54,9 +57,12 @@ void bli_packm_init_check( obj_t* a,
//bli_check_error_code( e_val ); //bli_check_error_code( e_val );
} }
void bli_packm_int_check( obj_t* a, void bli_packm_int_check
obj_t* p, (
cntx_t* cntx ) obj_t* a,
obj_t* p,
cntx_t* cntx
)
{ {
err_t e_val; err_t e_val;

View File

@@ -32,10 +32,17 @@
*/ */
void bli_packm_init_check( obj_t* a, void bli_packm_init_check
obj_t* p, (
cntx_t* cntx ); obj_t* a,
obj_t* p,
cntx_t* cntx
);
void bli_packm_int_check
(
obj_t* a,
obj_t* p,
cntx_t* cntx
);
void bli_packm_int_check( obj_t* a,
obj_t* p,
cntx_t* cntx );

View File

@@ -34,109 +34,49 @@
#include "blis.h" #include "blis.h"
packm_t* packm_cntl_row = NULL; cntl_t* bli_packm_cntl_obj_create
packm_t* packm_cntl_col = NULL; (
void* var_func,
packm_t* packm_cntl = NULL; void* packm_var_func,
bszid_t bmid_m,
void bli_packm_cntl_init() bszid_t bmid_n,
bool_t does_invert_diag,
bool_t rev_iter_if_upper,
bool_t rev_iter_if_lower,
pack_t pack_schema,
packbuf_t pack_buf_type,
cntl_t* sub_node
)
{ {
// Generally speaking, the BLIS_PACKED_ROWS and BLIS_PACKED_COLUMNS cntl_t* cntl;
// are used by the level-2 operations. These schemas amount to simple packm_params_t* params;
// copies to row or column storage. These simple schemas may be used
// by level-3 operations, but they should never be used for matrices
// with structure (since they do not densify).
// The BLIS_PACKED_ROW_PANELS and BLIS_PACKED_COL_PANELS schemas are
// used only in level-3 operations. They pack to (typically) skinny
// row and column panels, where the width of the panel is determined
// by register blocksizes. It is assumed that matrices with structure
// will be densified.
// Create control trees to pack by rows. // Allocate a packm_params_t struct.
packm_cntl_row params = bli_malloc_intl( sizeof( packm_params_t ) );
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1, // When packing to rows:
BLIS_VF, // used for m dimension
BLIS_VF, // used for n dimension
FALSE, // do NOT invert diagonal
FALSE, // do NOT iterate backwards if upper
FALSE, // do NOT iterate backwards if lower
BLIS_PACKED_ROWS,
BLIS_BUFFER_FOR_GEN_USE );
// Initialize the packm_params_t struct.
params->size = sizeof( packm_params_t );
params->var_func = packm_var_func;
params->bmid_m = bmid_m;
params->bmid_n = bmid_n;
params->does_invert_diag = does_invert_diag;
params->rev_iter_if_upper = rev_iter_if_upper;
params->rev_iter_if_lower = rev_iter_if_lower;
params->pack_schema = pack_schema;
params->pack_buf_type = pack_buf_type;
// Create control trees to pack by columns. // It's important that we set the bszid field to BLIS_NO_PART to indicate
packm_cntl_col // that no blocksize partitioning is performed. bli_cntl_free() will rely
= // on this information to know how to step through the thrinfo_t tree in
bli_packm_cntl_obj_create( BLIS_UNBLOCKED, // sync with the cntl_t tree.
BLIS_VARIANT1, // When packing to columns: cntl = bli_cntl_obj_create
BLIS_VF, // used for m dimension (
BLIS_VF, // used for n dimension BLIS_NO_PART,
FALSE, // do NOT invert diagonal var_func,
FALSE, // do NOT iterate backwards if upper params,
FALSE, // do NOT iterate backwards if lower sub_node
BLIS_PACKED_COLUMNS, );
BLIS_BUFFER_FOR_GEN_USE );
// Set defaults when we don't care whether the packing is by rows or
// by columns.
packm_cntl = packm_cntl_col;
}
void bli_packm_cntl_finalize()
{
bli_cntl_obj_free( packm_cntl_row );
bli_cntl_obj_free( packm_cntl_col );
}
packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
varnum_t var_num,
bszid_t bmid_m,
bszid_t bmid_n,
bool_t does_invert_diag,
bool_t rev_iter_if_upper,
bool_t rev_iter_if_lower,
pack_t pack_schema,
packbuf_t pack_buf_type )
{
packm_t* cntl;
cntl = ( packm_t* ) bli_malloc_intl( sizeof(packm_t) );
cntl->impl_type = impl_type;
cntl->var_num = var_num;
cntl->bmid_m = bmid_m;
cntl->bmid_n = bmid_n;
cntl->does_invert_diag = does_invert_diag;
cntl->rev_iter_if_upper = rev_iter_if_upper;
cntl->rev_iter_if_lower = rev_iter_if_lower;
cntl->pack_schema = pack_schema;
cntl->pack_buf_type = pack_buf_type;
return cntl; return cntl;
} }
void bli_packm_cntl_obj_init( packm_t* cntl,
impl_t impl_type,
varnum_t var_num,
bszid_t bmid_m,
bszid_t bmid_n,
bool_t does_invert_diag,
bool_t rev_iter_if_upper,
bool_t rev_iter_if_lower,
pack_t pack_schema,
packbuf_t pack_buf_type )
{
cntl->impl_type = impl_type;
cntl->var_num = var_num;
cntl->bmid_m = bmid_m;
cntl->bmid_n = bmid_n;
cntl->does_invert_diag = does_invert_diag;
cntl->rev_iter_if_upper = rev_iter_if_upper;
cntl->rev_iter_if_lower = rev_iter_if_lower;
cntl->pack_schema = pack_schema;
cntl->pack_buf_type = pack_buf_type;
}

View File

@@ -32,56 +32,65 @@
*/ */
struct packm_s struct packm_params_s
{ {
impl_t impl_type; uint64_t size; // size field must be present and come first.
varnum_t var_num; packm_voft var_func;
bszid_t bmid_m; bszid_t bmid_m;
bszid_t bmid_n; bszid_t bmid_n;
bool_t does_invert_diag; bool_t does_invert_diag;
bool_t rev_iter_if_upper; bool_t rev_iter_if_upper;
bool_t rev_iter_if_lower; bool_t rev_iter_if_lower;
pack_t pack_schema; pack_t pack_schema;
packbuf_t pack_buf_type; packbuf_t pack_buf_type;
}; };
typedef struct packm_s packm_t; typedef struct packm_params_s packm_params_t;
#define cntl_bmid_m( cntl ) cntl->bmid_m #define bli_cntl_packm_params_var_func( cntl ) \
#define cntl_bmid_n( cntl ) cntl->bmid_n \
( ( (packm_params_t*)(cntl)->params )->var_func )
#define cntl_does_invert_diag( cntl ) cntl->does_invert_diag #define bli_cntl_packm_params_bmid_m( cntl ) \
#define cntl_rev_iter_if_upper( cntl ) cntl->rev_iter_if_upper \
#define cntl_rev_iter_if_lower( cntl ) cntl->rev_iter_if_lower ( ( (packm_params_t*)(cntl)->params )->bmid_m )
#define cntl_pack_schema( cntl ) cntl->pack_schema
#define cntl_pack_buf_type( cntl ) cntl->pack_buf_type
#define bli_cntl_sub_packm( cntl ) cntl->sub_packm #define bli_cntl_packm_params_bmid_n( cntl ) \
#define bli_cntl_sub_packm_a( cntl ) cntl->sub_packm_a \
#define bli_cntl_sub_packm_a11( cntl ) cntl->sub_packm_a11 ( ( (packm_params_t*)(cntl)->params )->bmid_n )
#define bli_cntl_sub_packm_b( cntl ) cntl->sub_packm_b
#define bli_cntl_sub_packm_b11( cntl ) cntl->sub_packm_b11
#define bli_cntl_sub_packm_c( cntl ) cntl->sub_packm_c
#define bli_cntl_sub_packm_c11( cntl ) cntl->sub_packm_c11
void bli_packm_cntl_init( void ); #define bli_cntl_packm_params_does_invert_diag( cntl ) \
void bli_packm_cntl_finalize( void ); \
packm_t* bli_packm_cntl_obj_create( impl_t impl_type, ( ( (packm_params_t*)(cntl)->params )->does_invert_diag )
varnum_t var_num,
bszid_t bmid_m, #define bli_cntl_packm_params_rev_iter_if_upper( cntl ) \
bszid_t bmid_n, \
bool_t does_invert_diag, ( ( (packm_params_t*)(cntl)->params )->rev_iter_if_upper )
bool_t rev_iter_if_upper,
bool_t rev_iter_if_lower, #define bli_cntl_packm_params_rev_iter_if_lower( cntl ) \
pack_t pack_schema, \
packbuf_t pack_buf_type ); ( ( (packm_params_t*)(cntl)->params )->rev_iter_if_lower )
void bli_packm_cntl_obj_init( packm_t* cntl,
impl_t impl_type, #define bli_cntl_packm_params_pack_schema( cntl ) \
varnum_t var_num, \
bszid_t bmid_m, ( ( (packm_params_t*)(cntl)->params )->pack_schema )
bszid_t bmid_n,
bool_t does_invert_diag, #define bli_cntl_packm_params_pack_buf_type( cntl ) \
bool_t rev_iter_if_upper, \
bool_t rev_iter_if_lower, ( ( (packm_params_t*)(cntl)->params )->pack_buf_type )
pack_t pack_schema,
packbuf_t pack_buf_type ); // -----------------------------------------------------------------------------
cntl_t* bli_packm_cntl_obj_create
(
void* var_func,
void* packm_var_func,
bszid_t bmid_m,
bszid_t bmid_n,
bool_t does_invert_diag,
bool_t rev_iter_if_upper,
bool_t rev_iter_if_lower,
pack_t pack_schema,
packbuf_t pack_buf_type,
cntl_t* sub_node
);

View File

@@ -52,7 +52,7 @@ void bli_packm_cntx_init( cntx_t* cntx )
bli_gks_cntx_set_l1v_ker( BLIS_SETV_KER, cntx ); bli_gks_cntx_set_l1v_ker( BLIS_SETV_KER, cntx );
// Initialize the context with the global membrk object. // Initialize the context with the global membrk object.
bli_cntx_set_membrk( bli_mem_global_membrk(), cntx ); bli_cntx_set_membrk( bli_memsys_global_membrk(), cntx );
} }
void bli_packm_cntx_finalize( cntx_t* cntx ) void bli_packm_cntx_finalize( cntx_t* cntx )

View File

@@ -35,38 +35,43 @@
#include "blis.h" #include "blis.h"
void bli_packm_init( obj_t* a, siz_t bli_packm_init
obj_t* p, (
cntx_t* cntx, obj_t* a,
packm_t* cntl ) obj_t* p,
cntx_t* cntx,
cntl_t* cntl
)
{ {
// The purpose of packm_init() is to initialize an object P so that // The purpose of packm_init() is to initialize an object P so that
// a source object A can be packed into P via one of the packm // a source object A can be packed into P via one of the packm
// implementations. This initialization includes acquiring a suitable // implementations. This initialization precedes the acquisition of a
// block of memory from the memory allocator, if such a block of memory // suitable block of memory from the memory allocator (if such a block
// has not already been allocated previously. // of memory has not already been allocated previously).
invdiag_t invert_diag;
pack_t schema;
packord_t pack_ord_if_up;
packord_t pack_ord_if_lo;
packbuf_t pack_buf_type;
bszid_t bmult_id_m; bszid_t bmult_id_m;
bszid_t bmult_id_n; bszid_t bmult_id_n;
obj_t c; bool_t does_invert_diag;
bool_t rev_iter_if_upper;
bool_t rev_iter_if_lower;
//pack_t pack_schema;
packbuf_t pack_buf_type;
siz_t size_needed;
// Check parameters. // Check parameters.
if ( bli_error_checking_is_enabled() ) if ( bli_error_checking_is_enabled() )
bli_packm_init_check( a, p, cntx ); bli_packm_init_check( a, p, cntx );
// First check if we are to skip this operation because the control tree // Extract various fields from the control tree.
// is NULL, and if so, simply alias the object to its packed counterpart. bmult_id_m = bli_cntl_packm_params_bmid_m( cntl );
if ( bli_cntl_is_noop( cntl ) ) bmult_id_n = bli_cntl_packm_params_bmid_n( cntl );
{ does_invert_diag = bli_cntl_packm_params_does_invert_diag( cntl );
bli_obj_alias_to( *a, *p ); rev_iter_if_upper = bli_cntl_packm_params_rev_iter_if_upper( cntl );
return; rev_iter_if_lower = bli_cntl_packm_params_rev_iter_if_lower( cntl );
} //pack_schema = bli_cntl_packm_params_pack_schema( cntl );
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
#if 0
// Let us now check to see if the object has already been packed. First // Let us now check to see if the object has already been packed. First
// we check if it has been packed to an unspecified (row or column) // we check if it has been packed to an unspecified (row or column)
// format, in which case we can alias the object and return. // format, in which case we can alias the object and return.
@@ -79,179 +84,150 @@ void bli_packm_init( obj_t* a,
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC ) if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
{ {
bli_obj_alias_to( *a, *p ); bli_obj_alias_to( *a, *p );
return; return 0;
} }
// At this point, we can be assured that cntl is not NULL. Now we check // Now we check if the object has already been packed to the desired
// if the object has already been packed to the desired schema (as en- // schema (as encoded in the control tree). If so, we can alias and
// coded in the control tree). If so, we can alias and return, as above. // return 0.
// NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED // NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED
// and thus packing will be called for (but in some cases packing has // and thus packing will be called for (but in some cases packing has
// already taken place, or does not need to take place, and so that will // already taken place, or does not need to take place, and so that will
// be indicated by the pack status). Also, not all combinations of // be indicated by the pack status). Also, not all combinations of
// current pack status and desired pack schema are valid. // current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) ) if ( bli_obj_pack_schema( *a ) == pack_schema )
{ {
bli_obj_alias_to( *a, *p ); bli_obj_alias_to( *a, *p );
return; return 0;
} }
#endif
// If the object is marked as being filled with zeros, then we can skip // If the object is marked as being filled with zeros, then we can skip
// the packm operation entirely and alias. Notice that we use pack-aware // the packm operation entirely and alias.
// aliasing. This is needed because the object may have been packed in
// a previous iteration, which means the object currently contains the
// mem_t entry of an already-allocated block. bli_obj_alias_for_packing()
// will avoid overwriting that mem_t entry, which means it can be
// properly released later on.
if ( bli_obj_is_zeros( *a ) ) if ( bli_obj_is_zeros( *a ) )
{ {
bli_obj_alias_for_packing( *a, *p ); bli_obj_alias_to( *a, *p );
return; return 0;
} }
// Now, if we are not skipping the pack operation, then the only question // We now ignore the pack_schema field in the control tree and
// left is whether we are to typecast matrix a before packing. // extract the schema from the context, depending on whether we are
if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) )
bli_abort();
/*
{
// Initialize an object c for the intermediate typecast matrix.
bli_packm_init_cast( a,
p,
&c );
// Copy/typecast matrix a to matrix c.
bli_copym( a,
&c );
}
else
*/
{
// If no cast is needed, then aliasing object c to the original
// matrix serves as a minor optimization. This causes the packm
// implementation to pack directly from matrix a.
bli_obj_alias_to( *a, c );
}
// Extract various fields from the control tree.
pack_buf_type = cntl_pack_buf_type( cntl );
bmult_id_m = cntl_bmid_m( cntl );
bmult_id_n = cntl_bmid_n( cntl );
// Extract the schema from the context, depending on whether we are
// preparing to pack a block of A or panel of B. For A and B, we must // preparing to pack a block of A or panel of B. For A and B, we must
// obtain the schema from the context since the induced methods reuse // obtain the schema from the context since the induced methods reuse
// the same control trees used by native execution, and those induced // the same control trees used by native execution, and those induced
// methods specify the schema used by the current execution phase // methods specify the schema used by the current execution phase
// within the context (whereas the control tree does not change). // within the context (whereas the control tree does not change).
pack_t schema;
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK ) if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK )
{ {
schema = bli_cntx_get_pack_schema_a( cntx ); schema = bli_cntx_get_pack_schema_a( cntx );
//printf( "bli_packm_init: pack schema a = %x\n", schema );
} }
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL ) else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL )
{ {
schema = bli_cntx_get_pack_schema_b( cntx ); schema = bli_cntx_get_pack_schema_b( cntx );
//printf( "bli_packm_init: pack schema b = %x\n", schema );
} }
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL ) else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
{ {
// If we get a request to pack C for some reason, it is likely // If we get a request to pack C for some reason, it is likely
// not part of an induced method, and so it would be safe (and // not part of an induced method, and so it would be safe (and
// necessary) to read the pack schema from the control tree. // necessary) to read the pack schema from the control tree.
schema = cntl_pack_schema( cntl ); schema = bli_cntl_packm_params_pack_schema( cntl );
//printf( "bli_packm_init: pack schema c = %x\n", schema );
} }
// Prepare a few other variables based on properties of the control // Prepare a few other variables based on properties of the control
// tree. // tree.
if ( cntl_does_invert_diag( cntl ) ) invert_diag = BLIS_INVERT_DIAG; invdiag_t invert_diag;
else invert_diag = BLIS_NO_INVERT_DIAG; packord_t pack_ord_if_up;
packord_t pack_ord_if_lo;
if ( cntl_rev_iter_if_upper( cntl ) ) pack_ord_if_up = BLIS_PACK_REV_IF_UPPER; if ( does_invert_diag ) invert_diag = BLIS_INVERT_DIAG;
else pack_ord_if_up = BLIS_PACK_FWD_IF_UPPER; else invert_diag = BLIS_NO_INVERT_DIAG;
if ( cntl_rev_iter_if_lower( cntl ) ) pack_ord_if_lo = BLIS_PACK_REV_IF_LOWER; if ( rev_iter_if_upper ) pack_ord_if_up = BLIS_PACK_REV_IF_UPPER;
else pack_ord_if_lo = BLIS_PACK_FWD_IF_LOWER; else pack_ord_if_up = BLIS_PACK_FWD_IF_UPPER;
if ( rev_iter_if_lower ) pack_ord_if_lo = BLIS_PACK_REV_IF_LOWER;
else pack_ord_if_lo = BLIS_PACK_FWD_IF_LOWER;
// Initialize object p for the final packed matrix. // Initialize object p for the final packed matrix.
bli_packm_init_pack( invert_diag, size_needed
schema, =
pack_ord_if_up, bli_packm_init_pack
pack_ord_if_lo, (
pack_buf_type, invert_diag,
bmult_id_m, schema,
bmult_id_n, pack_ord_if_up,
&c, pack_ord_if_lo,
p, bmult_id_m,
cntx ); bmult_id_n,
a,
p,
cntx
);
// Now p is ready to be packed. // Return the size needed for memory allocation of the packed buffer.
return size_needed;
} }
void bli_packm_init_pack( invdiag_t invert_diag, siz_t bli_packm_init_pack
pack_t schema, (
packord_t pack_ord_if_up, invdiag_t invert_diag,
packord_t pack_ord_if_lo, pack_t schema,
packbuf_t pack_buf_type, packord_t pack_ord_if_up,
bszid_t bmult_id_m, packord_t pack_ord_if_lo,
bszid_t bmult_id_n, bszid_t bmult_id_m,
obj_t* c, bszid_t bmult_id_n,
obj_t* p, obj_t* a,
cntx_t* cntx ) obj_t* p,
cntx_t* cntx
)
{ {
num_t dt = bli_obj_datatype( *c ); num_t dt = bli_obj_datatype( *a );
trans_t transc = bli_obj_onlytrans_status( *c ); trans_t transa = bli_obj_onlytrans_status( *a );
dim_t m_c = bli_obj_length( *c ); dim_t m_a = bli_obj_length( *a );
dim_t n_c = bli_obj_width( *c ); dim_t n_a = bli_obj_width( *a );
dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_m, cntx ); dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_m, cntx );
dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_m, cntx ); dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_m, cntx );
dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_n, cntx ); dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_n, cntx );
dim_t bmult_n_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_n, cntx ); dim_t bmult_n_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_n, cntx );
membrk_t* membrk = bli_cntx_get_membrk( cntx );
mem_t* mem_p;
dim_t m_p, n_p; dim_t m_p, n_p;
dim_t m_p_pad, n_p_pad; dim_t m_p_pad, n_p_pad;
siz_t size_p; siz_t size_p;
siz_t elem_size_p; siz_t elem_size_p;
inc_t rs_p, cs_p; inc_t rs_p, cs_p;
inc_t is_p; inc_t is_p;
void* buf;
// We begin by copying the basic fields of c. We do NOT copy the // We begin by copying the fields of A.
// pack_mem entry from c because the entry in p may be cached from bli_obj_alias_to( *a, *p );
// a previous iteration, and thus we don't want to overwrite it.
bli_obj_alias_for_packing( *c, *p );
// Update the dimension fields to explicitly reflect a transposition, // Update the dimension fields to explicitly reflect a transposition,
// if needed. // if needed.
// Then, clear the conjugation and transposition fields from the object // Then, clear the conjugation and transposition fields from the object
// since matrix packing in BLIS is deemed to take care of all conjugation // since matrix packing in BLIS is deemed to take care of all conjugation
// and transposition necessary. // and transposition necessary.
// Then, we adjust the properties of p when c needs a transposition. // Then, we adjust the properties of P when A needs a transposition.
// We negate the diagonal offset, and if c is upper- or lower-stored, // We negate the diagonal offset, and if A is upper- or lower-stored,
// we either toggle the uplo of p. // we either toggle the uplo of P.
// Finally, if we mark p as dense since we assume that all matrices, // Finally, if we mark P as dense since we assume that all matrices,
// regardless of structure, will be densified. // regardless of structure, will be densified.
bli_obj_set_dims_with_trans( transc, m_c, n_c, *p ); bli_obj_set_dims_with_trans( transa, m_a, n_a, *p );
bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p ); bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p );
if ( bli_does_trans( transc ) ) if ( bli_does_trans( transa ) )
{ {
bli_obj_negate_diag_offset( *p ); bli_obj_negate_diag_offset( *p );
if ( bli_obj_is_upper_or_lower( *c ) ) if ( bli_obj_is_upper_or_lower( *a ) )
bli_obj_toggle_uplo( *p ); bli_obj_toggle_uplo( *p );
} }
// If we are packing micro-panels, mark p as dense. Otherwise, we are // If we are packing micro-panels, mark P as dense. Otherwise, we are
// probably being called in the context of a level-2 operation, in // probably being called in the context of a level-2 operation, in
// which case we do not want to overwrite the uplo field of p (inherited // which case we do not want to overwrite the uplo field of P (inherited
// from c) with BLIS_DENSE because that information may be needed by // from A) with BLIS_DENSE because that information may be needed by
// the level-2 operation's unblocked variant to decide whether to // the level-2 operation's unblocked variant to decide whether to
// execute a "lower" or "upper" branch of code. // execute a "lower" or "upper" branch of code.
if ( bli_is_panel_packed( schema ) ) if ( bli_is_panel_packed( schema ) )
@@ -265,7 +241,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
// Set the invert diagonal field. // Set the invert diagonal field.
bli_obj_set_invert_diag( invert_diag, *p ); bli_obj_set_invert_diag( invert_diag, *p );
// Set the pack status of p to the pack schema prescribed in the control // Set the pack status of P to the pack schema prescribed in the control
// tree node. // tree node.
bli_obj_set_pack_schema( schema, *p ); bli_obj_set_pack_schema( schema, *p );
@@ -273,15 +249,11 @@ void bli_packm_init_pack( invdiag_t invert_diag,
bli_obj_set_pack_order_if_upper( pack_ord_if_up, *p ); bli_obj_set_pack_order_if_upper( pack_ord_if_up, *p );
bli_obj_set_pack_order_if_lower( pack_ord_if_lo, *p ); bli_obj_set_pack_order_if_lower( pack_ord_if_lo, *p );
// Extract the address of the mem_t object within p that will track
// properties of the packed buffer.
mem_p = bli_obj_pack_mem( *p );
// Compute the dimensions padded by the dimension multiples. These // Compute the dimensions padded by the dimension multiples. These
// dimensions will be the dimensions of the packed matrices, including // dimensions will be the dimensions of the packed matrices, including
// zero-padding, and will be used by the macro- and micro-kernels. // zero-padding, and will be used by the macro- and micro-kernels.
// We compute them by starting with the effective dimensions of c (now // We compute them by starting with the effective dimensions of A (now
// in p) and aligning them to the dimension multiples (typically equal // in P) and aligning them to the dimension multiples (typically equal
// to register blocksizes). This does waste a little bit of space for // to register blocksizes). This does waste a little bit of space for
// level-2 operations, but that's okay with us. // level-2 operations, but that's okay with us.
m_p = bli_obj_length( *p ); m_p = bli_obj_length( *p );
@@ -295,9 +267,9 @@ void bli_packm_init_pack( invdiag_t invert_diag,
bli_obj_set_padded_dims( m_p_pad, n_p_pad, *p ); bli_obj_set_padded_dims( m_p_pad, n_p_pad, *p );
// Now we prepare to compute strides, align them, and compute the // Now we prepare to compute strides, align them, and compute the
// total number of bytes needed for the packed buffer. After that, // total number of bytes needed for the packed buffer. The caller
// we will acquire an appropriate block of memory from the memory // will then use that value to acquire an appropriate block of memory
// allocator. // from the memory allocator.
// Extract the element size for the packed object. // Extract the element size for the packed object.
elem_size_p = bli_obj_elem_size( *p ); elem_size_p = bli_obj_elem_size( *p );
@@ -320,7 +292,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
rs_p = bli_align_dim_to_size( rs_p, elem_size_p, rs_p = bli_align_dim_to_size( rs_p, elem_size_p,
BLIS_HEAP_STRIDE_ALIGN_SIZE ); BLIS_HEAP_STRIDE_ALIGN_SIZE );
// Store the strides in p. // Store the strides in P.
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, *p );
// Compute the size of the packed buffer. // Compute the size of the packed buffer.
@@ -343,7 +315,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
cs_p = bli_align_dim_to_size( cs_p, elem_size_p, cs_p = bli_align_dim_to_size( cs_p, elem_size_p,
BLIS_HEAP_STRIDE_ALIGN_SIZE ); BLIS_HEAP_STRIDE_ALIGN_SIZE );
// Store the strides in p. // Store the strides in P.
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, *p );
// Compute the size of the packed buffer. // Compute the size of the packed buffer.
@@ -431,7 +403,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( m_p_pad / m_panel ); else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( m_p_pad / m_panel );
else is_p = 1; else is_p = 1;
// Store the strides and panel dimension in p. // Store the strides and panel dimension in P.
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, *p );
bli_obj_set_imag_stride( is_p, *p ); bli_obj_set_imag_stride( is_p, *p );
bli_obj_set_panel_dim( m_panel, *p ); bli_obj_set_panel_dim( m_panel, *p );
@@ -524,7 +496,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( n_p_pad / n_panel ); else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( n_p_pad / n_panel );
else is_p = 1; else is_p = 1;
// Store the strides and panel dimension in p. // Store the strides and panel dimension in P.
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, *p );
bli_obj_set_imag_stride( is_p, *p ); bli_obj_set_imag_stride( is_p, *p );
bli_obj_set_panel_dim( n_panel, *p ); bli_obj_set_panel_dim( n_panel, *p );
@@ -547,99 +519,6 @@ void bli_packm_init_pack( invdiag_t invert_diag,
size_p = 0; size_p = 0;
} }
return size_p;
if ( bli_mem_is_unalloc( mem_p ) )
{
// If the mem_t object of p has not yet been allocated, then acquire
// a memory block of type pack_buf_type.
bli_membrk_acquire_m( membrk,
size_p,
pack_buf_type,
mem_p );
}
else
{
// If the mem_t object is currently allocated and smaller than is
// needed, then it must have been allocated for a different type
// of object (a different pack_buf_type value), so we must first
// release it and then re-acquire it using the new size and new
// pack_buf_type value.
if ( bli_mem_size( mem_p ) < size_p )
{
bli_membrk_release( mem_p );
bli_membrk_acquire_m( membrk,
size_p,
pack_buf_type,
mem_p );
}
}
// Grab the buffer address from the mem_t object and copy it to the
// main object buffer field. (Sometimes this buffer address will be
// copied when the value is already up-to-date, because it persists
// in the main object buffer field across loop iterations.)
buf = bli_mem_buffer( mem_p );
bli_obj_set_buffer( buf, *p );
} }
void bli_packm_release( obj_t* p,
packm_t* cntl )
{
if ( !bli_cntl_is_noop( cntl ) )
bli_obj_release_pack( p );
}
/*
void bli_packm_init_cast( obj_t* a,
obj_t* p,
obj_t* c )
{
// The idea here is that we want to create an object c that is identical
// to object a, except that:
// (1) the storage datatype of c is equal to the target datatype of a,
// with the element size of c adjusted accordingly,
// (2) the view offset of c is reset to (0,0),
// (3) object c's main buffer is set to a new memory region acquired
// from the memory manager, or extracted from p if a mem entry is
// already available, (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.)
// (4) object c is marked as being stored in a standard, contiguous
// format (ie: a column-major order).
// Any transposition encoded within object a will not be handled here,
// but rather will be handled in the packm implementation. That way,
// the only thing castm needs to do is cast.
num_t dt_targ_a = bli_obj_target_datatype( *a );
dim_t m_a = bli_obj_length( *a );
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
inc_t rs_c, cs_c;
// We begin by copying the basic fields of a.
bli_obj_alias_to( *a, *c );
// Update datatype and element size fields.
bli_obj_set_datatype( dt_targ_a, *c );
bli_obj_set_elem_size( elem_size_c, *c );
// Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *c );
// Check the mem_t entry of p associated with the cast buffer. If it is
// NULL, then acquire memory sufficient to hold the object data and cache
// it to p. (Otherwise, if it is non-NULL, then memory has already been
// acquired from the memory manager and cached.) We then set the main
// buffer of c to the cached address of the cast memory.
bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
// Update the strides. We set the increments to reflect column-major order
// storage. We start the leading dimension out as m(a) and increment it if
// necessary so that the beginning of each column is aligned.
cs_c = bli_align_dim_to_size( m_a, elem_size_c,
BLIS_HEAP_STRIDE_ALIGN_SIZE );
rs_c = 1;
bli_obj_set_strides( rs_c, cs_c, *c );
}
*/

View File

@@ -32,28 +32,24 @@
*/ */
void bli_packm_init( obj_t* a, siz_t bli_packm_init
obj_t* p, (
cntx_t* cntx, obj_t* a,
packm_t* cntl ); obj_t* p,
cntx_t* cntx,
cntl_t* cntl
);
void bli_packm_init_pack( invdiag_t invert_diag, siz_t bli_packm_init_pack
pack_t pack_schema, (
packord_t pack_ord_if_up, invdiag_t invert_diag,
packord_t pack_ord_if_lo, pack_t schema,
packbuf_t pack_buf_type, packord_t pack_ord_if_up,
bszid_t mr_id, packord_t pack_ord_if_lo,
bszid_t nr_id, bszid_t bmult_id_m,
obj_t* c, bszid_t bmult_id_n,
obj_t* p, obj_t* a,
cntx_t* cntx ); obj_t* p,
cntx_t* cntx
/* );
void bli_packm_init_cast( obj_t* a,
obj_t* p,
obj_t* c );
*/
void bli_packm_release( obj_t* p,
packm_t* cntl );

View File

@@ -34,33 +34,16 @@
#include "blis.h" #include "blis.h"
#define FUNCPTR_T packm_fp void bli_packm_int
(
typedef void (*FUNCPTR_T)( obj_t* a, obj_t* a,
obj_t* p, obj_t* p,
cntx_t* cntx, cntx_t* cntx,
thrinfo_t* t ); cntl_t* cntl,
thrinfo_t* thread
static FUNCPTR_T vars[6][3] = )
{ {
// unblocked optimized unblocked blocked packm_voft f;
{ bli_packm_unb_var1, NULL, bli_packm_blk_var1 },
{ NULL, NULL, NULL, },
{ NULL, NULL, NULL, },
{ NULL, NULL, NULL, },
{ NULL, NULL, NULL, },
{ NULL, NULL, NULL, },
};
void bli_packm_int( obj_t* a,
obj_t* p,
cntx_t* cntx,
packm_t* cntl,
thrinfo_t* thread )
{
varnum_t n;
impl_t i;
FUNCPTR_T f;
// Check parameters. // Check parameters.
if ( bli_error_checking_is_enabled() ) if ( bli_error_checking_is_enabled() )
@@ -70,14 +53,6 @@ void bli_packm_int( obj_t* a,
// it, then we should fold it into the next alias-and-early-exit block. // it, then we should fold it into the next alias-and-early-exit block.
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort(); //if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
// First check if we are to skip this operation because the control tree
// is NULL. We return without taking any action because a was already
// aliased to p in packm_init().
if ( bli_cntl_is_noop( cntl ) )
{
return;
}
// Let us now check to see if the object has already been packed. First // Let us now check to see if the object has already been packed. First
// we check if it has been packed to an unspecified (row or column) // we check if it has been packed to an unspecified (row or column)
// format, in which case we can return, since by now aliasing has already // format, in which case we can return, since by now aliasing has already
@@ -101,7 +76,7 @@ void bli_packm_int( obj_t* a,
// already taken place, or does not need to take place, and so that will // already taken place, or does not need to take place, and so that will
// be indicated by the pack status). Also, not all combinations of // be indicated by the pack status). Also, not all combinations of
// current pack status and desired pack schema are valid. // current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) ) if ( bli_obj_pack_schema( *a ) == bli_cntl_packm_params_pack_schema( cntl ) )
{ {
return; return;
} }
@@ -113,21 +88,17 @@ void bli_packm_int( obj_t* a,
return; return;
} }
// Extract the function pointer from the current control tree node.
// Extract the variant number and implementation type. f = bli_cntl_packm_params_var_func( cntl );
n = bli_cntl_var_num( cntl );
i = bli_cntl_impl_type( cntl );
// Index into the variant array to extract the correct function pointer.
f = vars[n][i];
// Invoke the variant with kappa_use. // Invoke the variant with kappa_use.
f( a, f
p, (
cntx, a,
thread ); p,
cntx,
// Barrier so that packing is done before computation cntl,
bli_thread_obarrier( thread ); thread
);
} }

View File

@@ -32,9 +32,11 @@
*/ */
void bli_packm_int( obj_t* a, void bli_packm_int
obj_t* p, (
cntx_t* cntx, obj_t* a,
packm_t* cntl, obj_t* p,
thrinfo_t* thread ); cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* thread
);

View File

@@ -41,7 +41,8 @@ thrinfo_t* bli_packm_thrinfo_create
thrcomm_t* icomm, thrcomm_t* icomm,
dim_t icomm_id, dim_t icomm_id,
dim_t n_way, dim_t n_way,
dim_t work_id dim_t work_id,
thrinfo_t* sub_node
) )
{ {
thrinfo_t* thread = bli_malloc_intl( sizeof( thrinfo_t ) ); thrinfo_t* thread = bli_malloc_intl( sizeof( thrinfo_t ) );
@@ -53,9 +54,8 @@ thrinfo_t* bli_packm_thrinfo_create
icomm, icomm_id, icomm, icomm_id,
n_way, n_way,
work_id, work_id,
NULL, FALSE,
NULL, sub_node
NULL
); );
return thread; return thread;
@@ -69,7 +69,8 @@ void bli_packm_thrinfo_init
thrcomm_t* icomm, thrcomm_t* icomm,
dim_t icomm_id, dim_t icomm_id,
dim_t n_way, dim_t n_way,
dim_t work_id dim_t work_id,
thrinfo_t* sub_node
) )
{ {
bli_thrinfo_init bli_thrinfo_init
@@ -78,9 +79,8 @@ void bli_packm_thrinfo_init
ocomm, ocomm_id, ocomm, ocomm_id,
icomm, icomm_id, icomm, icomm_id,
n_way, work_id, n_way, work_id,
NULL, FALSE,
NULL, sub_node
NULL
); );
} }
@@ -95,7 +95,8 @@ void bli_packm_thrinfo_init_single
&BLIS_SINGLE_COMM, 0, &BLIS_SINGLE_COMM, 0,
&BLIS_SINGLE_COMM, 0, &BLIS_SINGLE_COMM, 0,
1, 1,
0 0,
NULL
); );
} }

View File

@@ -49,7 +49,8 @@ thrinfo_t* bli_packm_thrinfo_create
thrcomm_t* icomm, thrcomm_t* icomm,
dim_t icomm_id, dim_t icomm_id,
dim_t n_way, dim_t n_way,
dim_t work_id dim_t work_id,
thrinfo_t* sub_node
); );
void bli_packm_thrinfo_init void bli_packm_thrinfo_init
@@ -60,7 +61,8 @@ void bli_packm_thrinfo_init
thrcomm_t* icomm, thrcomm_t* icomm,
dim_t icomm_id, dim_t icomm_id,
dim_t n_way, dim_t n_way,
dim_t work_id dim_t work_id,
thrinfo_t* sub_node
); );
void bli_packm_thrinfo_init_single void bli_packm_thrinfo_init_single

View File

@@ -55,10 +55,14 @@ typedef void (*FUNCPTR_T)(
static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1); static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1);
void bli_packm_unb_var1( obj_t* c, void bli_packm_unb_var1
obj_t* p, (
cntx_t* cntx, obj_t* c,
thrinfo_t* thread ) obj_t* p,
cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* thread
)
{ {
num_t dt_cp = bli_obj_datatype( *c ); num_t dt_cp = bli_obj_datatype( *c );

View File

@@ -32,10 +32,14 @@
*/ */
void bli_packm_unb_var1( obj_t* c, void bli_packm_unb_var1
obj_t* p, (
cntx_t* cntx, obj_t* c,
thrinfo_t* thread ); obj_t* p,
cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* thread
);
#undef GENTPROT #undef GENTPROT

View File

@@ -32,6 +32,5 @@
*/ */
void bli_trsm_cntx_init( void ); #include "bli_scalm_cntl.h"
void bli_trsm_cntx_finalize( void );

View File

@@ -34,38 +34,25 @@
#include "blis.h" #include "blis.h"
scalm_t* scalm_cntl = NULL; cntl_t* bli_scalm_cntl_obj_create
(
void bli_scalm_cntl_init() void* var_func,
cntl_t* sub_node
)
{ {
scalm_cntl = bli_scalm_cntl_obj_create( BLIS_UNBLOCKED, cntl_t* cntl;
BLIS_VARIANT1 );
}
void bli_scalm_cntl_finalize() // It's important that we set the bszid field to BLIS_NO_PART to indicate
{ // that no blocksize partitioning is performed. bli_cntl_free() will rely
bli_cntl_obj_free( scalm_cntl ); // on this information to know how to step through the thrinfo_t tree in
} // sync with the cntl_t tree.
cntl = bli_cntl_obj_create
(
scalm_t* bli_scalm_cntl_obj_create( impl_t impl_type, BLIS_NO_PART,
varnum_t var_num ) var_func,
{ NULL,
scalm_t* cntl; sub_node
);
cntl = ( scalm_t* ) bli_malloc_intl( sizeof(scalm_t) );
cntl->impl_type = impl_type;
cntl->var_num = var_num;
return cntl; return cntl;
} }
void bli_scalm_cntl_obj_init( scalm_t* cntl,
impl_t impl_type,
varnum_t var_num )
{
cntl->impl_type = impl_type;
cntl->var_num = var_num;
}

View File

@@ -32,20 +32,9 @@
*/ */
struct scalm_s
{
impl_t impl_type;
varnum_t var_num;
};
typedef struct scalm_s scalm_t;
#define bli_cntl_sub_scalm( cntl ) cntl->sub_scalm
void bli_scalm_cntl_init( void );
void bli_scalm_cntl_finalize( void );
scalm_t* bli_scalm_cntl_obj_create( impl_t impl_type,
varnum_t var_num );
void bli_scalm_cntl_obj_init( scalm_t* cntl,
impl_t impl_type,
varnum_t var_num );
cntl_t* bli_scalm_cntl_obj_create
(
void* var_func,
cntl_t* sub_node
);

View File

@@ -37,8 +37,7 @@
#include "bli_unpackm_int.h" #include "bli_unpackm_int.h"
#include "bli_unpackm_unb_var1.h" #include "bli_unpackm_unb_var1.h"
//#include "bli_unpackm_blk_var1.h"
#include "bli_unpackm_blk_var2.h" #include "bli_unpackm_blk_var1.h"
#include "bli_unpackm_cxk.h" #include "bli_unpackm_cxk.h"

View File

@@ -52,13 +52,17 @@ typedef void (*FUNCPTR_T)(
cntx_t* cntx cntx_t* cntx
); );
static FUNCPTR_T GENARRAY(ftypes,unpackm_blk_var2); static FUNCPTR_T GENARRAY(ftypes,unpackm_blk_var1);
void bli_unpackm_blk_var2( obj_t* p, void bli_unpackm_blk_var1
obj_t* c, (
cntx_t* cntx, obj_t* p,
unpackm_t* cntl ) obj_t* c,
cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* thread
)
{ {
num_t dt_cp = bli_obj_datatype( *c ); num_t dt_cp = bli_obj_datatype( *c );
@@ -266,5 +270,5 @@ void PASTEMAC(ch,varname) \
\ \
} }
INSERT_GENTFUNC_BASIC0( unpackm_blk_var2 ) INSERT_GENTFUNC_BASIC0( unpackm_blk_var1 )

View File

@@ -32,14 +32,35 @@
*/ */
#define bli_thrinfo_sub_self( thread ) thread->sub_l3op void bli_unpackm_blk_var1
#define bli_thrinfo_sub_opackm( thread ) thread->opackm (
#define bli_thrinfo_sub_ipackm( thread ) thread->ipackm obj_t* p,
obj_t* c,
cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* thread
);
#define trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
//thrinfo_t** bli_trmm_thrinfo_create_paths( bool_t jc_dependency ); #undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
struc_t strucc, \
doff_t diagoffc, \
diag_t diagc, \
uplo_t uploc, \
trans_t transc, \
dim_t m, \
dim_t n, \
dim_t m_panel, \
dim_t n_panel, \
void* p, inc_t rs_p, inc_t cs_p, \
dim_t pd_p, inc_t ps_p, \
void* c, inc_t rs_c, inc_t cs_c, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( unpackm_blk_var1 )

View File

@@ -34,10 +34,12 @@
#include "blis.h" #include "blis.h"
void bli_unpackm_check( obj_t* p, void bli_unpackm_int_check
obj_t* a, (
cntx_t* cntx, obj_t* p,
unpackm_t* cntl ) obj_t* a,
cntx_t* cntx
)
{ {
err_t e_val; err_t e_val;

View File

@@ -32,7 +32,10 @@
*/ */
void bli_unpackm_check( obj_t* p, void bli_unpackm_int_check
obj_t* a, (
cntx_t* cntx, obj_t* p,
unpackm_t* cntl ); obj_t* a,
cntx_t* cntx
);

View File

@@ -34,42 +34,35 @@
#include "blis.h" #include "blis.h"
unpackm_t* unpackm_cntl = NULL; cntl_t* bli_unpackm_cntl_obj_create
(
void bli_unpackm_cntl_init() void* var_func,
void* unpackm_var_func,
cntl_t* sub_node
)
{ {
unpackm_cntl = bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, cntl_t* cntl;
BLIS_VARIANT1, unpackm_params_t* params;
NULL ); // no blocksize needed
}
void bli_unpackm_cntl_finalize() // Allocate an unpackm_params_t struct.
{ params = bli_malloc_intl( sizeof( unpackm_params_t ) );
bli_cntl_obj_free( unpackm_cntl );
}
unpackm_t* bli_unpackm_cntl_obj_create( impl_t impl_type, // Initialize the unpackm_params_t struct.
varnum_t var_num, params->size = sizeof( unpackm_params_t );
blksz_t* b ) params->var_func = unpackm_var_func;
{
unpackm_t* cntl;
cntl = ( unpackm_t* ) bli_malloc_intl( sizeof(unpackm_t) ); // It's important that we set the bszid field to BLIS_NO_PART to indicate
// that no blocksize partitioning is performed. bli_cntl_free() will rely
cntl->impl_type = impl_type; // on this information to know how to step through the thrinfo_t tree in
cntl->var_num = var_num; // sync with the cntl_t tree.
cntl->b = b; cntl = bli_cntl_obj_create
(
BLIS_NO_PART,
var_func,
params,
sub_node
);
return cntl; return cntl;
} }
void bli_unpackm_cntl_obj_init( unpackm_t* cntl,
impl_t impl_type,
varnum_t var_num,
blksz_t* b )
{
cntl->impl_type = impl_type;
cntl->var_num = var_num;
cntl->b = b;
}

View File

@@ -32,28 +32,23 @@
*/ */
struct unpackm_s struct unpackm_params_s
{ {
impl_t impl_type; uint64_t size; // size field must be present and come first.
varnum_t var_num; unpackm_voft var_func;
blksz_t* b;
}; };
typedef struct unpackm_s unpackm_t; typedef struct unpackm_params_s unpackm_params_t;
#define bli_cntl_sub_unpackm( cntl ) cntl->sub_unpackm #define bli_cntl_unpackm_params_var_func( cntl ) \
#define bli_cntl_sub_unpackm_a( cntl ) cntl->sub_unpackm_a \
#define bli_cntl_sub_unpackm_a11( cntl ) cntl->sub_unpackm_a11 ( ( (unpackm_params_t*)(cntl)->params )->var_func )
#define bli_cntl_sub_unpackm_b( cntl ) cntl->sub_unpackm_b
#define bli_cntl_sub_unpackm_b11( cntl ) cntl->sub_unpackm_b11 // -----------------------------------------------------------------------------
#define bli_cntl_sub_unpackm_c( cntl ) cntl->sub_unpackm_c
#define bli_cntl_sub_unpackm_c11( cntl ) cntl->sub_unpackm_c11 cntl_t* bli_unpackm_cntl_obj_create
(
void* var_func,
void* unpackm_var_func,
cntl_t* sub_node
);
void bli_unpackm_cntl_init( void );
void bli_unpackm_cntl_finalize( void );
unpackm_t* bli_unpackm_cntl_obj_create( impl_t impl_type,
varnum_t var_num,
blksz_t* b );
void bli_unpackm_cntl_obj_init( unpackm_t* cntl,
impl_t impl_type,
varnum_t var_num,
blksz_t* b );

View File

@@ -152,15 +152,16 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
#undef GENTFUNC #undef GENTFUNC
#define GENTFUNC( ctype, ch, opname ) \ #define GENTFUNC( ctype, ch, opname ) \
\ \
void PASTEMAC(ch,opname)( \ void PASTEMAC(ch,opname) \
conj_t conjp, \ ( \
dim_t m, \ conj_t conjp, \
dim_t n, \ dim_t m, \
void* beta, \ dim_t n, \
void* p, inc_t ldp, \ void* beta, \
void* a, inc_t inca, inc_t lda, \ void* p, inc_t ldp, \
cntx_t* cntx \ void* a, inc_t inca, inc_t lda, \
) \ cntx_t* cntx \
) \
{ \ { \
dim_t panel_dim; \ dim_t panel_dim; \
num_t dt; \ num_t dt; \

View File

@@ -34,188 +34,43 @@
#include "blis.h" #include "blis.h"
#define FUNCPTR_T unpackm_fp void bli_unpackm_int
(
typedef void (*FUNCPTR_T)( obj_t* p, obj_t* p,
obj_t* a, obj_t* a,
cntx_t* cntx, cntx_t* cntx,
unpackm_t* cntl ); cntl_t* cntl,
thrinfo_t* thread
static FUNCPTR_T vars[2][3] = )
{ {
// unblocked optimized unblocked blocked unpackm_voft f;
{ bli_unpackm_unb_var1, NULL, NULL, },
{ NULL, NULL, bli_unpackm_blk_var2, },
};
void bli_unpackm_int( obj_t* p, // Check parameters.
obj_t* a, if ( bli_error_checking_is_enabled() )
cntx_t* cntx, bli_unpackm_int_check( p, a, cntx );
unpackm_t* cntl,
thrinfo_t* thread )
{
// The unpackm operation consists of an optional post-process: castm.
// (This post-process is analogous to the castm pre-process in packm.)
// Here are the following possible ways unpackm can execute:
// 1. unpack and cast: Unpack to a temporary matrix c and then cast
// c to a.
// 2. unpack only: Unpack directly to matrix a since typecasting is
// not needed.
// 3. cast only: Not yet supported / not used.
// 4. no-op: The control tree directs us to skip the unpack operation
// entirely. No action is taken.
obj_t c;
varnum_t n;
impl_t i;
FUNCPTR_T f;
// Sanity check; A should never have a zero dimension. If we must support
// it, then we should fold it into the next alias-and-early-exit block.
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
// First check if we are to skip this operation because the control tree
// is NULL, and if so, simply return.
if ( bli_cntl_is_noop( cntl ) )
{
return;
}
// If p was aliased to a during the pack stage (because it was already // If p was aliased to a during the pack stage (because it was already
// in an acceptable packed/contiguous format), then no unpack is actually // in an acceptable packed/contiguous format), then no unpack is actually
// necessary, so we return. // necessary, so we return.
if ( bli_obj_is_alias_of( *p, *a ) ) if ( bli_obj_is_alias_of( *p, *a ) ) return;
{
return;
}
// Check parameters. // Extract the function pointer from the current control tree node.
if ( bli_error_checking_is_enabled() ) f = bli_cntl_unpackm_params_var_func( cntl );
bli_unpackm_check( p, a, cntx, cntl );
// Now, if we are not skipping the unpack operation, then the only
// question left is whether we are to typecast matrix a after unpacking.
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
bli_abort();
/*
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
{
// Initialize an object c for the intermediate typecast matrix.
bli_unpackm_init_cast( p,
a,
&c );
}
else
*/
{
// If no cast is needed, then aliasing object c to the original
// matrix serves as a minor optimization. This causes the unpackm
// implementation to unpack directly into matrix a.
bli_obj_alias_to( *a, c );
}
// Now we are ready to proceed with the unpacking.
// Extract the variant number and implementation type.
n = bli_cntl_var_num( cntl );
i = bli_cntl_impl_type( cntl );
// Index into the variant array to extract the correct function pointer.
f = vars[n][i];
// Invoke the variant. // Invoke the variant.
if( bli_thread_am_ochief( thread ) ) { if ( bli_thread_am_ochief( thread ) )
f( p,
&c,
cntx,
cntl );
}
bli_thread_obarrier( thread );
// Now, if necessary, we cast the contents of c to matrix a. If casting
// was not necessary, then we are done because the call to the unpackm
// implementation would have unpacked directly to matrix a.
/*
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
{ {
// Copy/typecast matrix c to matrix a. f
// NOTE: Here, we use copynzm instead of copym because, in the cases (
// where we are unpacking/typecasting a real matrix c to a complex p,
// matrix a, we want to touch only the real components of a, rather a,
// than also set the imaginary components to zero. This comes about cntx,
// because of the fact that, if we are unpacking real-to-complex, cntl,
// then it is because all of the computation occurred in the real thread
// domain, and so we would want to leave whatever imaginary values );
// there are in matrix a untouched. Notice that for unpackings that }
// entail complex-to-complex data movements, the copynzm operation
// behaves exactly as copym, so no use cases are lost (at least none
// that I can think of).
bli_copynzm( &c,
a );
// NOTE: The above code/comment is outdated. What should happen is // Barrier so that unpacking is done before computation.
// as follows: bli_thread_obarrier( thread );
// - If dt(a) is complex and dt(p) is real, then create an alias of
// a and then tweak it so that it looks like a real domain object.
// This will involve:
// - projecting the datatype to real domain
// - scaling both the row and column strides by 2
// ALL OF THIS should be done in the front-end, NOT here, as
// unpackm() won't even be needed in that case.
}
*/
} }
/*
void bli_unpackm_init_cast( obj_t* p,
obj_t* a,
obj_t* c )
{
// The idea here is that we want to create an object c that is identical
// to object a, except that:
// (1) the storage datatype of c is equal to the target datatype of a,
// with the element size of c adjusted accordingly,
// (2) the view offset of c is reset to (0,0),
// (3) object c's main buffer is set to a new memory region acquired
// from the memory manager, or extracted from p if a mem entry is
// already available, (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.)
// (4) object c is marked as being stored in a standard, contiguous
// format (ie: column-major order).
// Any transposition encoded within object a will also be encoded in
// object c. That way, unpackm handles any needed transposition during
// the unpacking, and the only thing the cast stage needs to do is cast.
num_t dt_targ_a = bli_obj_target_datatype( *a );
dim_t m_a = bli_obj_length( *a );
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
inc_t rs_c, cs_c;
// We begin by copying the basic fields of a.
bli_obj_alias_to( *a, *c );
// Update datatype and element size fields.
bli_obj_set_datatype( dt_targ_a, *c );
bli_obj_set_elem_size( elem_size_c, *c );
// Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *c );
// Check the mem_t entry of p associated with the cast buffer. If it is
// NULL, then acquire memory sufficient to hold the object data and cache
// it to p. (Otherwise, if it is non-NULL, then memory has already been
// acquired from the memory manager and cached.) We then set the main
// buffer of c to the cached address of the cast memory.
bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
// Update the strides. We set the increments to reflect column-major order
// storage. We start the leading dimension out as m(a) and increment it if
// necessary so that the beginning of each column is aligned.
cs_c = bli_align_dim_to_size( m_a, elem_size_c,
BLIS_HEAP_STRIDE_ALIGN_SIZE );
rs_c = 1;
bli_obj_set_strides( rs_c, cs_c, *c );
}
*/

View File

@@ -32,14 +32,12 @@
*/ */
void bli_unpackm_int( obj_t* p, void bli_unpackm_int
obj_t* a, (
cntx_t* cntx, obj_t* p,
unpackm_t* cntl, obj_t* a,
thrinfo_t* thread ); cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* thread
);
/*
void bli_unpackm_init_cast( obj_t* p,
obj_t* a,
obj_t* c );
*/

View File

@@ -50,10 +50,14 @@ typedef void (*FUNCPTR_T)(
static FUNCPTR_T GENARRAY(ftypes,unpackm_unb_var1); static FUNCPTR_T GENARRAY(ftypes,unpackm_unb_var1);
void bli_unpackm_unb_var1( obj_t* p, void bli_unpackm_unb_var1
obj_t* c, (
cntx_t* cntx, obj_t* p,
unpackm_t* cntl ) obj_t* c,
cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* thread
)
{ {
num_t dt_pc = bli_obj_datatype( *p ); num_t dt_pc = bli_obj_datatype( *p );

View File

@@ -32,10 +32,14 @@
*/ */
void bli_unpackm_unb_var1( obj_t* p, void bli_unpackm_unb_var1
obj_t* c, (
cntx_t* cntx, obj_t* p,
unpackm_t* cntl ); obj_t* c,
cntx_t* cntx,
cntl_t* cntl,
thrinfo_t* thread
);
#undef GENTPROT #undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \ #define GENTPROT( ctype, ch, varname ) \

View File

@@ -32,9 +32,10 @@
*/ */
#include "bli_gemv_cntl.h" // NOTE: level-2 control tree code is temporarily disabled.
#include "bli_gemv_front.h" //#include "bli_gemv_cntl.h"
#include "bli_gemv_int.h" //#include "bli_gemv_front.h"
//#include "bli_gemv_int.h"
#include "bli_gemv_var.h" #include "bli_gemv_var.h"

View File

@@ -48,7 +48,7 @@ void PASTEMAC0(opname) \
obj_t* beta, \ obj_t* beta, \
obj_t* y, \ obj_t* y, \
cntx_t* cntx, \ cntx_t* cntx, \
gemv_t* cntl \ cntl_t* cntl \
); );
GENPROT( gemv_blk_var1 ) GENPROT( gemv_blk_var1 )

View File

@@ -45,7 +45,7 @@ void PASTEMAC0(opname) \
obj_t* beta, \ obj_t* beta, \
obj_t* y, \ obj_t* y, \
cntx_t* cntx, \ cntx_t* cntx, \
gemv_t* cntl \ cntl_t* cntl \
) \ ) \
{ \ { \
num_t dt = bli_obj_datatype( *a ); \ num_t dt = bli_obj_datatype( *a ); \

View File

@@ -34,43 +34,64 @@
#include "blis.h" #include "blis.h"
void bli_trsm_cntx_init( cntx_t* cntx ) #undef GENFRONT
{ #define GENFRONT( ftname, opname ) \
// Perform basic setup on the context. \
bli_cntx_obj_create( cntx ); /*static gemv_vft GENARRAY(ftypes,gemv_unb_var1);*/ \
static GENARRAY_VFP(ftname,opname); \
\
void PASTEMAC0(opname) \
( \
obj_t* alpha, \
obj_t* a, \
obj_t* x, \
obj_t* beta, \
obj_t* y, \
cntx_t* cntx, \
gemv_t* cntl \
) \
{ \
num_t dt = bli_obj_datatype( *a ); \
\
trans_t transa = bli_obj_conjtrans_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
\
dim_t m = bli_obj_length( *a ); \
dim_t n = bli_obj_width( *a ); \
\
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
\
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
\
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \
\
PASTECH(ftname,_vft) f = PASTECH(opname,_vfp)[dt]; \
\
/* Invoke the void pointer-based function for the given datatype. */ \
f( \
transa, \
conjx, \
m, \
n, \
buf_alpha, \
buf_a, rs_a, cs_a, \
buf_x, incx, \
buf_beta, \
buf_y, incy, \
cntx \
); \
} \
// Initialize the context with the current architecture's native GENFRONT( gemv, gemv_unb_var1 )
// level-3 gemm micro-kernel, and its output preferences. GENFRONT( gemv, gemv_unb_var2 )
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMM_UKR, cntx );
bli_gks_cntx_set_l3_nat_ukr_prefs( BLIS_GEMM_UKR, cntx );
// Initialize the context with the current architecture's native GENFRONT( gemv, gemv_unf_var1 )
// level-3 trsm micro-kernels. GENFRONT( gemv, gemv_unf_var2 )
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_L_UKR, cntx );
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_U_UKR, cntx );
bli_gks_cntx_set_l3_nat_ukr( BLIS_TRSM_L_UKR, cntx );
bli_gks_cntx_set_l3_nat_ukr( BLIS_TRSM_U_UKR, cntx );
// Initialize the context with the current architecture's register
// and cache blocksizes (and multiples), given the execution method.
bli_gks_cntx_set_blkszs( BLIS_NAT, 6,
BLIS_NC, BLIS_NR,
BLIS_KC, BLIS_KR,
BLIS_MC, BLIS_MR,
BLIS_NR, BLIS_NR,
BLIS_MR, BLIS_MR,
BLIS_KR, BLIS_KR,
cntx );
// Set the pack_t schemas for native execution.
bli_cntx_set_pack_schema_ab( BLIS_PACKED_ROW_PANELS,
BLIS_PACKED_COL_PANELS,
cntx );
}
void bli_trsm_cntx_finalize( cntx_t* cntx )
{
// Free the context and all memory allocated to it.
bli_cntx_obj_free( cntx );
}

View File

@@ -32,8 +32,9 @@
*/ */
#include "bli_ger_cntl.h" // NOTE: level-2 control tree code is temporarily disabled.
#include "bli_ger_front.h" //#include "bli_ger_cntl.h"
#include "bli_ger_int.h" //#include "bli_ger_front.h"
//#include "bli_ger_int.h"
#include "bli_ger_var.h" #include "bli_ger_var.h"

View File

@@ -47,7 +47,7 @@ void PASTEMAC0(opname) \
obj_t* y, \ obj_t* y, \
obj_t* a, \ obj_t* a, \
cntx_t* cntx, \ cntx_t* cntx, \
ger_t* cntl \ cntl_t* cntl \
); );
GENPROT( ger_blk_var1 ) GENPROT( ger_blk_var1 )

View File

@@ -44,7 +44,7 @@ void PASTEMAC0(opname) \
obj_t* y, \ obj_t* y, \
obj_t* a, \ obj_t* a, \
cntx_t* cntx, \ cntx_t* cntx, \
ger_t* cntl \ cntl_t* cntl \
) \ ) \
{ \ { \
num_t dt = bli_obj_datatype( *a ); \ num_t dt = bli_obj_datatype( *a ); \

Some files were not shown because too many files have changed in this diff Show More