mirror of
https://github.com/amd/blis.git
synced 2026-05-11 01:30:00 +00:00
conflicts merge for bli_kernel.h
Change-Id: I15d846bd34e11f86ebfd7ed091ff671a1f3366a0
This commit is contained in:
15
README.md
15
README.md
@@ -7,16 +7,17 @@ Introduction
|
||||
------------
|
||||
|
||||
BLIS is a portable software framework for instantiating high-performance
|
||||
BLAS-like dense linear algebra libraries. The framework was designed to
|
||||
isolate essential kernels of computation that, when optimized, immediately
|
||||
enable optimized implementations of most of its commonly used and
|
||||
computationally intensive operations. BLIS is written in [ISO
|
||||
BLAS-like dense linear algebra libraries. The framework was designed to isolate
|
||||
essential kernels of computation that, when optimized, immediately enable
|
||||
optimized implementations of most of its commonly used and computationally
|
||||
intensive operations. BLIS is written in [ISO
|
||||
C99](http://en.wikipedia.org/wiki/C99) and available under a
|
||||
[new/modified/3-clause BSD
|
||||
license](http://opensource.org/licenses/BSD-3-Clause). While BLIS exports a
|
||||
[new BLAS-like API](), it also includes a BLAS compatibility layer which gives
|
||||
application developers access to BLIS implementations via traditional [BLAS
|
||||
routine calls](http://www.netlib.org/lapack/lug/node145.html).
|
||||
[new BLAS-like API](https://github.com/flame/blis/wiki/BLISAPIQuickReference),
|
||||
it also includes a BLAS compatibility layer which gives application developers
|
||||
access to BLIS implementations via traditional [BLAS routine
|
||||
calls](http://www.netlib.org/lapack/lug/node145.html).
|
||||
|
||||
For a thorough presentation of our framework, please read our recently accepted
|
||||
journal article, ["BLIS: A Framework for Rapidly Instantiating BLAS
|
||||
|
||||
@@ -125,6 +125,18 @@
|
||||
#define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS
|
||||
#endif
|
||||
|
||||
// zgemm micro-kernel
|
||||
|
||||
#if 1
|
||||
#define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4
|
||||
#define BLIS_DEFAULT_MC_Z 72
|
||||
#define BLIS_DEFAULT_KC_Z 256
|
||||
#define BLIS_DEFAULT_NC_Z 4080
|
||||
#define BLIS_DEFAULT_MR_Z 3
|
||||
#define BLIS_DEFAULT_NR_Z 4
|
||||
|
||||
#define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS
|
||||
#endif
|
||||
|
||||
// -- trsm-related --
|
||||
|
||||
|
||||
9
configure
vendored
9
configure
vendored
@@ -91,7 +91,7 @@ print_usage()
|
||||
echo " -t MODEL, --enable-threading[=MODEL], --disable-threading"
|
||||
echo " "
|
||||
echo " Enable threading in the library, using threading model"
|
||||
echo " MODEL={omp,pthreads,no}. If MODEL=no or "
|
||||
echo " MODEL={openmp,pthreads,no}. If MODEL=no or "
|
||||
echo " --disable-threading is specified, threading will be"
|
||||
echo " disabled. The default is 'no'."
|
||||
echo " "
|
||||
@@ -486,13 +486,18 @@ main()
|
||||
|
||||
|
||||
# Check the threading model flag.
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
# NOTE: 'omp' is deprecated but still supported; 'openmp' is preferred.
|
||||
>>>>>>> origin/master
|
||||
enable_openmp='no'
|
||||
enable_openmp_01=0
|
||||
enable_pthreads='no'
|
||||
enable_pthreads_01=0
|
||||
if [ "x${threading_model}" = "xauto" ]; then
|
||||
echo "${script_name}: determining the threading model automatically."
|
||||
elif [ "x${threading_model}" = "xomp" ]; then
|
||||
elif [ "x${threading_model}" = "xopenmp" ] ||
|
||||
[ "x${threading_model}" = "xomp" ]; then
|
||||
echo "${script_name}: using OpenMP for threading."
|
||||
enable_openmp='yes'
|
||||
enable_openmp_01=1
|
||||
|
||||
@@ -99,8 +99,8 @@ void bli_getsc_check
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_noninteger_object( chi );
|
||||
bli_check_error_code( e_val );
|
||||
//e_val = bli_check_noninteger_object( chi );
|
||||
//bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
@@ -125,8 +125,8 @@ void bli_setsc_check
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( chi );
|
||||
bli_check_error_code( e_val );
|
||||
//e_val = bli_check_floating_object( chi );
|
||||
//bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
|
||||
@@ -198,8 +198,8 @@ void PASTEMAC0(opname) \
|
||||
if ( bli_is_constant( dt_chi ) ) dt_use = dt_def; \
|
||||
else dt_use = dt_chi; \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_3 \
|
||||
/* Invoke the typed function (with integer support). */ \
|
||||
bli_call_ft_3i \
|
||||
( \
|
||||
dt_use, \
|
||||
opname, \
|
||||
@@ -229,8 +229,8 @@ void PASTEMAC0(opname) \
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_3 \
|
||||
/* Invoke the typed function (with integer support). */ \
|
||||
bli_call_ft_3i \
|
||||
( \
|
||||
dt_chi, \
|
||||
opname, \
|
||||
|
||||
@@ -227,3 +227,25 @@ void PASTEMAC(ch,opname) \
|
||||
|
||||
INSERT_GENTFUNCR_BASIC0( zipsc )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_igetsc
|
||||
(
|
||||
dim_t* chi,
|
||||
double* zeta_r,
|
||||
double* zeta_i
|
||||
)
|
||||
{
|
||||
PASTEMAC2(i,d,gets)( *chi, *zeta_r, *zeta_i );
|
||||
}
|
||||
|
||||
void bli_isetsc
|
||||
(
|
||||
double zeta_r,
|
||||
double zeta_i,
|
||||
dim_t* chi
|
||||
)
|
||||
{
|
||||
PASTEMAC2(d,i,sets)( zeta_r, zeta_i, *chi );
|
||||
}
|
||||
|
||||
|
||||
@@ -141,3 +141,19 @@ void PASTEMAC(ch,opname) \
|
||||
|
||||
INSERT_GENTPROTR_BASIC( zipsc )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_igetsc
|
||||
(
|
||||
dim_t* chi,
|
||||
double* zeta_r,
|
||||
double* zeta_i
|
||||
);
|
||||
|
||||
void bli_isetsc
|
||||
(
|
||||
double zeta_r,
|
||||
double zeta_i,
|
||||
dim_t* chi
|
||||
);
|
||||
|
||||
|
||||
@@ -46,12 +46,14 @@
|
||||
#include "bli_l1v_tapi.h"
|
||||
|
||||
// Pack-related
|
||||
#include "bli_packv.h"
|
||||
#include "bli_unpackv.h"
|
||||
// NOTE: packv and unpackv are temporarily disabled.
|
||||
//#include "bli_packv.h"
|
||||
//#include "bli_unpackv.h"
|
||||
|
||||
// Other
|
||||
#include "bli_scalv_cntl.h"
|
||||
#include "bli_scalv_int.h"
|
||||
// NOTE: scalv control tree code is temporarily disabled.
|
||||
//#include "bli_scalv_cntl.h"
|
||||
//#include "bli_scalv_int.h"
|
||||
|
||||
// Reference kernel headers
|
||||
#include "bli_l1v_ref.h"
|
||||
|
||||
@@ -56,6 +56,21 @@ GENFRONT( subv )
|
||||
GENFRONT( swapv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* index \
|
||||
) \
|
||||
{ \
|
||||
bli_l1v_xi_check( x, index ); \
|
||||
}
|
||||
|
||||
GENFRONT( amaxv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
@@ -481,3 +496,39 @@ void bli_l1v_ax_check
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_l1v_xi_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* index
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_integer_object( index );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_nonconstant_object( index );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_vector_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_scalar_object( index );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( index );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -52,6 +52,18 @@ GENTPROT( subv )
|
||||
GENTPROT( swapv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* index \
|
||||
);
|
||||
|
||||
GENTPROT( amaxv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
@@ -196,3 +208,9 @@ void bli_l1v_ax_check
|
||||
obj_t* x
|
||||
);
|
||||
|
||||
void bli_l1v_xi_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* index
|
||||
);
|
||||
|
||||
|
||||
@@ -55,6 +55,7 @@ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \
|
||||
}
|
||||
|
||||
GENFRONT( addv, BLIS_ADDV_KER )
|
||||
GENFRONT( amaxv, BLIS_AMAXV_KER )
|
||||
GENFRONT( copyv, BLIS_COPYV_KER )
|
||||
GENFRONT( dotv, BLIS_DOTV_KER )
|
||||
GENFRONT( dotxv, BLIS_DOTXV_KER )
|
||||
|
||||
@@ -44,6 +44,7 @@ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ); \
|
||||
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx );
|
||||
|
||||
GENPROT( addv )
|
||||
GENPROT( amaxv )
|
||||
GENPROT( axpbyv )
|
||||
GENPROT( axpyv )
|
||||
GENPROT( copyv )
|
||||
|
||||
@@ -58,6 +58,21 @@ INSERT_GENTDEF( addv )
|
||||
INSERT_GENTDEF( copyv )
|
||||
INSERT_GENTDEF( subv )
|
||||
|
||||
// amaxv
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( ctype, ch, opname, tsuf ) \
|
||||
\
|
||||
typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
dim_t* restrict index, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTDEF( amaxv )
|
||||
|
||||
// axpbyv
|
||||
|
||||
#undef GENTDEF
|
||||
|
||||
@@ -54,6 +54,20 @@ INSERT_GENTPROT_BASIC( copyv_ker_name )
|
||||
INSERT_GENTPROT_BASIC( subv_ker_name )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
dim_t* restrict index, \
|
||||
cntx_t* cntx \
|
||||
); \
|
||||
|
||||
INSERT_GENTPROT_BASIC( amaxv_ker_name )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
|
||||
@@ -82,6 +82,44 @@ GENFRONT( copyv )
|
||||
GENFRONT( subv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* index \
|
||||
BLIS_OAPI_CNTX_PARAM \
|
||||
) \
|
||||
{ \
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_datatype( *x ); \
|
||||
\
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
\
|
||||
void* buf_index = bli_obj_buffer_at_off( *index ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, index ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_5 \
|
||||
( \
|
||||
dt, \
|
||||
opname, \
|
||||
n, \
|
||||
buf_x, incx, \
|
||||
buf_index, \
|
||||
cntx \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( amaxv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
|
||||
@@ -52,6 +52,19 @@ GENTPROT( copyv )
|
||||
GENTPROT( subv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* index \
|
||||
BLIS_OAPI_CNTX_PARAM \
|
||||
);
|
||||
|
||||
GENTPROT( amaxv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
|
||||
@@ -74,6 +74,38 @@ INSERT_GENTFUNC_BASIC( copyv, BLIS_COPYV_KER )
|
||||
INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, kerid ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
dim_t* index, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
cntx_t* cntx_p; \
|
||||
\
|
||||
bli_cntx_init_local_if( opname, cntx, cntx_p ); \
|
||||
\
|
||||
PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
n, \
|
||||
x, incx, \
|
||||
index, \
|
||||
cntx_p \
|
||||
); \
|
||||
\
|
||||
bli_cntx_finalize_local_if( opname, cntx ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( amaxv, BLIS_AMAXV_KER )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, kerid ) \
|
||||
\
|
||||
|
||||
@@ -40,6 +40,9 @@
|
||||
#undef addv_ker_name
|
||||
#define addv_ker_name addv
|
||||
|
||||
#undef amaxv_ker_name
|
||||
#define amaxv_ker_name amaxv
|
||||
|
||||
#undef axpbyv_ker_name
|
||||
#define axpbyv_ker_name axpbyv
|
||||
|
||||
|
||||
134
frame/1/kernels/bli_amaxv_ref.c
Normal file
134
frame/1/kernels/bli_amaxv_ref.c
Normal file
@@ -0,0 +1,134 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
//
|
||||
// Define BLAS-like interfaces with typed operands.
|
||||
//
|
||||
|
||||
#undef GENTFUNCR
|
||||
#define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
dim_t* i_max, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype_r* minus_one = PASTEMAC(chr,m1); \
|
||||
dim_t* zero_i = PASTEMAC(i,0); \
|
||||
\
|
||||
ctype_r chi1_r; \
|
||||
ctype_r chi1_i; \
|
||||
ctype_r abs_chi1; \
|
||||
ctype_r abs_chi1_max; \
|
||||
dim_t i; \
|
||||
\
|
||||
/* Initialize the index of the maximum absolute value to zero. */ \
|
||||
PASTEMAC(i,copys)( zero_i, *i_max ); \
|
||||
\
|
||||
/* If the vector length is zero, return early. This directly emulates
|
||||
the behavior of netlib BLAS's i?amax() routines. */ \
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
\
|
||||
/* Initialize the maximum absolute value search candidate with
|
||||
-1, which is guaranteed to be less than all values we will
|
||||
compute. */ \
|
||||
PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \
|
||||
\
|
||||
if ( incx == 1 ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
/* Get the real and imaginary components of chi1. */ \
|
||||
PASTEMAC2(ch,chr,gets)( x[i], chi1_r, chi1_i ); \
|
||||
\
|
||||
/* Replace chi1_r and chi1_i with their absolute values. */ \
|
||||
PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
|
||||
PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
|
||||
\
|
||||
/* Add the real and imaginary absolute values together. */ \
|
||||
PASTEMAC(chr,set0s)( abs_chi1 ); \
|
||||
PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \
|
||||
PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \
|
||||
\
|
||||
/* If the absolute value of the current element exceeds that of
|
||||
the previous largest, save it and its index. If NaN is
|
||||
encountered, then treat it the same as if it were a valid
|
||||
value that was smaller than any previously seen. This
|
||||
behavior mimics that of LAPACK's ?lange(). */ \
|
||||
if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \
|
||||
{ \
|
||||
abs_chi1_max = abs_chi1; \
|
||||
*i_max = i; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
ctype* chi1 = x + (i )*incx; \
|
||||
\
|
||||
/* Get the real and imaginary components of chi1. */ \
|
||||
PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \
|
||||
\
|
||||
/* Replace chi1_r and chi1_i with their absolute values. */ \
|
||||
PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \
|
||||
PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \
|
||||
\
|
||||
/* Add the real and imaginary absolute values together. */ \
|
||||
PASTEMAC(chr,set0s)( abs_chi1 ); \
|
||||
PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \
|
||||
PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \
|
||||
\
|
||||
/* If the absolute value of the current element exceeds that of
|
||||
the previous largest, save it and its index. If NaN is
|
||||
encountered, then treat it the same as if it were a valid
|
||||
value that was smaller than any previously seen. This
|
||||
behavior mimics that of LAPACK's ?lange(). */ \
|
||||
if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \
|
||||
{ \
|
||||
abs_chi1_max = abs_chi1; \
|
||||
*i_max = i; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCR_BASIC0( amaxv_ref )
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#if 0
|
||||
packv_t* packv_cntl = NULL;
|
||||
|
||||
void bli_packv_cntl_init( void )
|
||||
@@ -77,4 +78,41 @@ void bli_packv_cntl_obj_init( packv_t* cntl,
|
||||
cntl->bmid = bmid;
|
||||
cntl->pack_schema = pack_schema;
|
||||
}
|
||||
#endif
|
||||
|
||||
cntl_t* bli_packv_cntl_obj_create
|
||||
(
|
||||
void* var_func,
|
||||
void* packv_var_func,
|
||||
bszid_t bmid,
|
||||
pack_t pack_schema,
|
||||
cntl_t* sub_node
|
||||
)
|
||||
{
|
||||
cntl_t* cntl;
|
||||
packv_params_t* params;
|
||||
|
||||
// Allocate a packv_params_t struct.
|
||||
params = bli_malloc_intl( sizeof( packv_params_t ) );
|
||||
|
||||
// Initialize the packv_params_t struct.
|
||||
params->size = sizeof( packv_params_t );
|
||||
params->packv_var_func = packv_var_func;
|
||||
params->bmid = bmid;
|
||||
params->pack_schema = pack_schema;
|
||||
|
||||
// It's important that we set the bszid field to BLIS_NO_PART to indicate
|
||||
// that no blocksize partitioning is performed. bli_cntl_free() will rely
|
||||
// on this information to know how to step through the thrinfo_t tree in
|
||||
// sync with the cntl_t tree.
|
||||
cntl = bli_cntl_obj_create
|
||||
(
|
||||
BLIS_NO_PART,
|
||||
var_func,
|
||||
params,
|
||||
sub_node
|
||||
);
|
||||
|
||||
return cntl;
|
||||
}
|
||||
|
||||
67
frame/1/other/packv/bli_packv_cntl.h
Normal file
67
frame/1/other/packv/bli_packv_cntl.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
struct packv_params_s
|
||||
{
|
||||
uint64_t size
|
||||
packv_voft* var_func;
|
||||
bszid_t bmid;
|
||||
pack_t pack_schema;
|
||||
};
|
||||
typedef struct packv_params_s packv_params_t;
|
||||
|
||||
|
||||
#define bli_cntl_packv_params_var_func( cntl ) \
|
||||
\
|
||||
( (packv_params_t*)( cntl->params )->var_func )
|
||||
|
||||
#define bli_cntl_packv_params_bmid( cntl ) \
|
||||
\
|
||||
( (packv_params_t*)( cntl->params )->bmid_m )
|
||||
|
||||
#define bli_cntl_packv_params_pack_schema( cntl ) \
|
||||
\
|
||||
( (packv_params_t*)( cntl->params )->pack_schema )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_packv_cntl_obj_create
|
||||
(
|
||||
void* var_func,
|
||||
void* packv_var_func,
|
||||
bszid_t bmid,
|
||||
pack_t pack_schema,
|
||||
cntl_t* sub_node
|
||||
);
|
||||
|
||||
@@ -52,7 +52,6 @@ void bli_packv_init
|
||||
|
||||
pack_t pack_schema;
|
||||
bszid_t bmult_id;
|
||||
obj_t c;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
@@ -84,26 +83,6 @@ void bli_packv_init
|
||||
// left is whether we are to typecast vector a before packing.
|
||||
if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) )
|
||||
bli_abort();
|
||||
/*
|
||||
{
|
||||
// Initialize an object c for the intermediate typecast vector.
|
||||
bli_packv_init_cast( a,
|
||||
p,
|
||||
&c );
|
||||
|
||||
// Copy/typecast vector a to vector c.
|
||||
bli_copyv( a,
|
||||
&c );
|
||||
}
|
||||
else
|
||||
*/
|
||||
{
|
||||
// If no cast is needed, then aliasing object c to the original
|
||||
// vector serves as a minor optimization. This causes the packv
|
||||
// implementation to pack directly from vector a.
|
||||
bli_obj_alias_to( *a, c );
|
||||
}
|
||||
|
||||
|
||||
// Extract various fields from the control tree and pass them in
|
||||
// explicitly into _init_pack(). This allows external code generators
|
||||
@@ -116,7 +95,7 @@ void bli_packv_init
|
||||
(
|
||||
pack_schema,
|
||||
bmult_id,
|
||||
&c,
|
||||
&a,
|
||||
p,
|
||||
cntx
|
||||
);
|
||||
@@ -125,22 +104,24 @@ void bli_packv_init
|
||||
}
|
||||
|
||||
|
||||
void bli_packv_init_pack
|
||||
siz_t bli_packv_init_pack
|
||||
(
|
||||
pack_t pack_schema,
|
||||
pack_t schema,
|
||||
bszid_t bmult_id,
|
||||
obj_t* c,
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx
|
||||
)
|
||||
{
|
||||
num_t dt = bli_obj_datatype( *c );
|
||||
dim_t dim_c = bli_obj_vector_dim( *c );
|
||||
num_t dt = bli_obj_datatype( *a );
|
||||
dim_t dim_a = bli_obj_vector_dim( *a );
|
||||
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );
|
||||
|
||||
membrk_t* membrk = bli_cntx_membrk( cntx );
|
||||
|
||||
#if 0
|
||||
mem_t* mem_p;
|
||||
#endif
|
||||
dim_t m_p_pad;
|
||||
siz_t size_p;
|
||||
inc_t rs_p, cs_p;
|
||||
@@ -148,21 +129,17 @@ void bli_packv_init_pack
|
||||
|
||||
|
||||
// We begin by copying the basic fields of c.
|
||||
bli_obj_alias_to( *c, *p );
|
||||
bli_obj_alias_to( *a, *p );
|
||||
|
||||
// Update the dimensions.
|
||||
bli_obj_set_dims( dim_c, 1, *p );
|
||||
bli_obj_set_dims( dim_a, 1, *p );
|
||||
|
||||
// Reset the view offsets to (0,0).
|
||||
bli_obj_set_offs( 0, 0, *p );
|
||||
|
||||
// Set the pack schema in the p object to the value in the control tree
|
||||
// node.
|
||||
bli_obj_set_pack_schema( pack_schema, *p );
|
||||
|
||||
// Extract the address of the mem_t object within p that will track
|
||||
// properties of the packed buffer.
|
||||
mem_p = bli_obj_pack_mem( *p );
|
||||
bli_obj_set_pack_schema( schema, *p );
|
||||
|
||||
// Compute the dimensions padded by the dimension multiples.
|
||||
m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( *p ), bmult );
|
||||
@@ -170,6 +147,11 @@ void bli_packv_init_pack
|
||||
// Compute the size of the packed buffer.
|
||||
size_p = m_p_pad * 1 * bli_obj_elem_size( *p );
|
||||
|
||||
#if 0
|
||||
// Extract the address of the mem_t object within p that will track
|
||||
// properties of the packed buffer.
|
||||
mem_p = bli_obj_pack_mem( *p );
|
||||
|
||||
if ( bli_mem_is_unalloc( mem_p ) )
|
||||
{
|
||||
// If the mem_t object of p has not yet been allocated, then acquire
|
||||
@@ -192,19 +174,19 @@ void bli_packv_init_pack
|
||||
}
|
||||
}
|
||||
|
||||
// Save the padded (packed) dimensions into the packed object.
|
||||
bli_obj_set_padded_dims( m_p_pad, 1, *p );
|
||||
|
||||
// Grab the buffer address from the mem_t object and copy it to the
|
||||
// main object buffer field. (Sometimes this buffer address will be
|
||||
// copied when the value is already up-to-date, because it persists
|
||||
// in the main object buffer field across loop iterations.)
|
||||
buf = bli_mem_buffer( mem_p );
|
||||
bli_obj_set_buffer( buf, *p );
|
||||
#endif
|
||||
|
||||
// Save the padded (packed) dimensions into the packed object.
|
||||
bli_obj_set_padded_dims( m_p_pad, 1, *p );
|
||||
|
||||
// Set the row and column strides of p based on the pack schema.
|
||||
if ( pack_schema == BLIS_PACKED_VECTOR )
|
||||
if ( schema == BLIS_PACKED_VECTOR )
|
||||
{
|
||||
// Set the strides to reflect a column-stored vector. Note that the
|
||||
// column stride may never be used, and is only useful to determine
|
||||
@@ -215,8 +197,11 @@ void bli_packv_init_pack
|
||||
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
}
|
||||
|
||||
return size_p;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void bli_packv_release
|
||||
(
|
||||
obj_t* p,
|
||||
@@ -226,52 +211,4 @@ void bli_packv_release
|
||||
if ( !bli_cntl_is_noop( cntl ) )
|
||||
bli_obj_release_pack( p );
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
void bli_packv_init_cast( obj_t* a,
|
||||
obj_t* p,
|
||||
obj_t* c )
|
||||
{
|
||||
// The idea here is that we want to create an object c that is identical
|
||||
// to object a, except that:
|
||||
// (1) the storage datatype of c is equal to the target datatype of a,
|
||||
// with the element size of c adjusted accordingly,
|
||||
// (2) object c is marked as being stored in a standard, contiguous
|
||||
// format (ie: a column vector),
|
||||
// (3) the view offset of c is reset to (0,0), and
|
||||
// (4) object c's main buffer is set to a new memory region acquired
|
||||
// from the memory manager, or extracted from p if a mem entry is
|
||||
// already available. (After acquring a mem entry from the memory
|
||||
// manager, it is cached within p for quick access later on.)
|
||||
|
||||
num_t dt_targ_a = bli_obj_target_datatype( *a );
|
||||
dim_t dim_a = bli_obj_vector_dim( *a );
|
||||
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
|
||||
|
||||
// We begin by copying the basic fields of a.
|
||||
bli_obj_alias_to( *a, *c );
|
||||
|
||||
// Update datatype and element size fields.
|
||||
bli_obj_set_datatype( dt_targ_a, *c );
|
||||
bli_obj_set_elem_size( elem_size_c, *c );
|
||||
|
||||
// Update the dimensions.
|
||||
bli_obj_set_dims( dim_a, 1, *c );
|
||||
|
||||
// Reset the view offsets to (0,0).
|
||||
bli_obj_set_offs( 0, 0, *c );
|
||||
|
||||
// Check the mem_t entry of p associated with the cast buffer. If it is
|
||||
// NULL, then acquire memory sufficient to hold the object data and cache
|
||||
// it to p. (Otherwise, if it is non-NULL, then memory has already been
|
||||
// acquired from the memory manager and cached.) We then set the main
|
||||
// buffer of c to the cached address of the cast memory.
|
||||
bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
|
||||
|
||||
// Update the strides. We set the increments to reflect a column storage.
|
||||
// Note that the column stride should never be used.
|
||||
bli_obj_set_strides( 1, dim_a, *c );
|
||||
}
|
||||
*/
|
||||
|
||||
#endif
|
||||
@@ -40,23 +40,12 @@ void bli_packv_init
|
||||
packv_t* cntl
|
||||
);
|
||||
|
||||
void bli_packv_init_pack
|
||||
siz_t bli_packv_init_pack
|
||||
(
|
||||
pack_t pack_schema,
|
||||
bszid_t bmult_id,
|
||||
obj_t* c,
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx
|
||||
);
|
||||
|
||||
void bli_packv_release
|
||||
(
|
||||
obj_t* p,
|
||||
packv_t* cntl
|
||||
);
|
||||
|
||||
/*
|
||||
void bli_packv_init_cast( obj_t* a,
|
||||
obj_t* p,
|
||||
obj_t* c );
|
||||
*/
|
||||
@@ -47,27 +47,23 @@ static FUNCPTR_T vars[1][3] =
|
||||
{ bli_packv_unb_var1, NULL, NULL }
|
||||
};
|
||||
|
||||
void bli_packv_int( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
packv_t* cntl )
|
||||
void bli_packv_int
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl
|
||||
)
|
||||
{
|
||||
// The packv operation consists of an optional typecasting pre-process.
|
||||
// Here are the following possible ways packv can execute:
|
||||
// 1. cast and pack: When typecasting and packing are both
|
||||
// precribed, typecast a to temporary vector c and then pack
|
||||
// c to p.
|
||||
// 2. pack only: Typecasting is skipped when it is not needed;
|
||||
// simply pack a directly to p.
|
||||
// 3. cast only: Not yet supported / not used.
|
||||
// 4. no-op: The control tree sometimes directs us to skip the
|
||||
// pack operation entirely. Alias p to a and return.
|
||||
|
||||
//obj_t c;
|
||||
|
||||
#if 0
|
||||
varnum_t n;
|
||||
impl_t i;
|
||||
FUNCPTR_T f;
|
||||
#endif
|
||||
packv_voft f;
|
||||
|
||||
// !!!
|
||||
// DEFINE packv_voft type.
|
||||
// !!!
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "bli_l1m_check.h"
|
||||
|
||||
#include "bli_l1m_ft.h"
|
||||
#include "bli_l1m_voft.h"
|
||||
|
||||
// Prototype object APIs with and without contexts.
|
||||
#include "bli_oapi_w_cntx.h"
|
||||
@@ -51,6 +52,5 @@
|
||||
#include "bli_unpackm.h"
|
||||
|
||||
// Other
|
||||
#include "bli_scalm_cntl.h"
|
||||
#include "bli_scalm_int.h"
|
||||
#include "bli_scalm.h"
|
||||
|
||||
|
||||
75
frame/1m/bli_l1m_voft.h
Normal file
75
frame/1m/bli_l1m_voft.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_L1M_VAR_OFT_H
|
||||
#define BLIS_L1M_VAR_OFT_H
|
||||
|
||||
|
||||
//
|
||||
// -- Level-3 variant function types -------------------------------------------
|
||||
//
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( opname ) \
|
||||
\
|
||||
typedef void (*PASTECH(opname,_voft)) \
|
||||
( \
|
||||
obj_t* a, \
|
||||
obj_t* p, \
|
||||
cntx_t* cntx, \
|
||||
cntl_t* cntl, \
|
||||
thrinfo_t* thread \
|
||||
);
|
||||
|
||||
GENTDEF( packm )
|
||||
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( opname ) \
|
||||
\
|
||||
typedef void (*PASTECH(opname,_voft)) \
|
||||
( \
|
||||
obj_t* p, \
|
||||
obj_t* a, \
|
||||
cntx_t* cntx, \
|
||||
cntl_t* cntl, \
|
||||
thrinfo_t* thread \
|
||||
);
|
||||
|
||||
GENTDEF( unpackm )
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -93,10 +93,14 @@ static func_t packm_struc_cxk_kers[BLIS_NUM_PACK_SCHEMA_TYPES] =
|
||||
};
|
||||
|
||||
|
||||
void bli_packm_blk_var1( obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
thrinfo_t* t )
|
||||
void bli_packm_blk_var1
|
||||
(
|
||||
obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* t
|
||||
)
|
||||
{
|
||||
num_t dt_cp = bli_obj_datatype( *c );
|
||||
|
||||
@@ -140,7 +144,7 @@ void bli_packm_blk_var1( obj_t* c,
|
||||
// whether we are executing an induced method.
|
||||
if ( bli_is_nat_packed( schema ) )
|
||||
{
|
||||
// This branch if for native execution, where we assume that
|
||||
// This branch is for native execution, where we assume that
|
||||
// the micro-kernel will always apply the alpha scalar of the
|
||||
// higher-level operation. Thus, we use BLIS_ONE for kappa so
|
||||
// that the underlying packm implementation does not perform
|
||||
@@ -156,28 +160,25 @@ void bli_packm_blk_var1( obj_t* c,
|
||||
// real domain micro-kernels. (In the aforementioned situation,
|
||||
// applying a real scalar is easy, but applying a complex one is
|
||||
// harder, so we avoid the need altogether with the code below.)
|
||||
if( bli_thread_am_ochief( t ) )
|
||||
if ( bli_obj_scalar_has_nonzero_imag( p ) )
|
||||
{
|
||||
if ( bli_obj_scalar_has_nonzero_imag( p ) )
|
||||
{
|
||||
//printf( "applying non-zero imag kappa\n" );
|
||||
// Detach the scalar.
|
||||
bli_obj_scalar_detach( p, &kappa );
|
||||
//printf( "applying non-zero imag kappa\n" );
|
||||
|
||||
// Reset the attached scalar (to 1.0).
|
||||
bli_obj_scalar_reset( p );
|
||||
// Detach the scalar.
|
||||
bli_obj_scalar_detach( p, &kappa );
|
||||
|
||||
kappa_p = κ
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the internal scalar of A has only a real component, then
|
||||
// we will apply it later (in the micro-kernel), and so we will
|
||||
// use BLIS_ONE to indicate no scaling during packing.
|
||||
kappa_p = &BLIS_ONE;
|
||||
}
|
||||
// Reset the attached scalar (to 1.0).
|
||||
bli_obj_scalar_reset( p );
|
||||
|
||||
kappa_p = κ
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the internal scalar of A has only a real component, then
|
||||
// we will apply it later (in the micro-kernel), and so we will
|
||||
// use BLIS_ONE to indicate no scaling during packing.
|
||||
kappa_p = &BLIS_ONE;
|
||||
}
|
||||
kappa_p = bli_thread_obroadcast( t, kappa_p );
|
||||
|
||||
// Acquire the buffer to the kappa chosen above.
|
||||
buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
|
||||
@@ -194,7 +195,12 @@ void bli_packm_blk_var1( obj_t* c,
|
||||
bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers;
|
||||
else packm_kers = packm_struc_cxk_kers;
|
||||
#else
|
||||
func_t* cntx_packm_kers = bli_cntx_get_packm_ukr( cntx );
|
||||
// The original idea here was to read the packm_ukr from the context
|
||||
// if it is non-NULL. The problem is, it requires that we be able to
|
||||
// assume that the packm_ukr field is initialized to NULL, which it
|
||||
// currently is not.
|
||||
|
||||
//func_t* cntx_packm_kers = bli_cntx_get_packm_ukr( cntx );
|
||||
|
||||
//if ( bli_func_is_null_dt( dt_cp, cntx_packm_kers ) )
|
||||
{
|
||||
@@ -203,7 +209,6 @@ void bli_packm_blk_var1( obj_t* c,
|
||||
// we use the default lookup table to determine the right func_t
|
||||
// for the current schema.
|
||||
const dim_t i = bli_pack_schema_index( schema );
|
||||
//printf( "bli_packm_blk_var1: pack schema index = %lu (schema = %x)\n", i, schema );
|
||||
|
||||
packm_kers = &packm_struc_cxk_kers[ i ];
|
||||
}
|
||||
@@ -221,11 +226,6 @@ void bli_packm_blk_var1( obj_t* c,
|
||||
// Query the datatype-specific function pointer from the func_t object.
|
||||
packm_ker = bli_func_get_dt( dt_cp, packm_kers );
|
||||
|
||||
|
||||
//bli_cntx_print( cntx );
|
||||
//printf( "bli_packm_blk_var1: packm_ker = %p\n", packm_ker );
|
||||
//printf( "bli_packm_blk_var1: cntx_packm_ker = %p\n", cntx_packm_kers );
|
||||
//printf( "bli_packm_blk_var1: local_table_entry = %p\n", &packm_struc_cxk_kers[ bli_pack_schema_index( schema ) ] );
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_cp];
|
||||
@@ -598,6 +598,57 @@ PASTEMAC(ch,fprintm)( stdout, "packm_var2: a", m, n, \
|
||||
p_inc = ps_p; \
|
||||
} \
|
||||
\
|
||||
/*
|
||||
if ( col_stored ) { \
|
||||
if ( bli_thread_work_id( thread ) == 0 ) \
|
||||
{ \
|
||||
printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
|
||||
fflush( stdout ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \
|
||||
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \
|
||||
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
|
||||
fflush( stdout ); \
|
||||
} \
|
||||
bli_thread_obarrier( thread ); \
|
||||
if ( bli_thread_work_id( thread ) == 1 ) \
|
||||
{ \
|
||||
printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
|
||||
fflush( stdout ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \
|
||||
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \
|
||||
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
|
||||
fflush( stdout ); \
|
||||
} \
|
||||
bli_thread_obarrier( thread ); \
|
||||
} \
|
||||
else { \
|
||||
if ( bli_thread_work_id( thread ) == 0 ) \
|
||||
{ \
|
||||
printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
|
||||
fflush( stdout ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \
|
||||
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \
|
||||
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
|
||||
fflush( stdout ); \
|
||||
} \
|
||||
bli_thread_obarrier( thread ); \
|
||||
if ( bli_thread_work_id( thread ) == 1 ) \
|
||||
{ \
|
||||
printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \
|
||||
fflush( stdout ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \
|
||||
( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \
|
||||
( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \
|
||||
fflush( stdout ); \
|
||||
} \
|
||||
bli_thread_obarrier( thread ); \
|
||||
} \
|
||||
*/ \
|
||||
\
|
||||
/*
|
||||
if ( bli_is_4mi_packed( schema ) ) { \
|
||||
printf( "packm_var2: is_p_use = %lu\n", is_p_use ); \
|
||||
|
||||
@@ -32,10 +32,14 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_blk_var1( obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
thrinfo_t* t );
|
||||
void bli_packm_blk_var1
|
||||
(
|
||||
obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* t
|
||||
);
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
|
||||
@@ -35,9 +35,12 @@
|
||||
#include "blis.h"
|
||||
|
||||
|
||||
void bli_packm_init_check( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx )
|
||||
void bli_packm_init_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
@@ -54,9 +57,12 @@ void bli_packm_init_check( obj_t* a,
|
||||
//bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_packm_int_check( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx )
|
||||
void bli_packm_int_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
|
||||
@@ -32,10 +32,17 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_init_check( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx );
|
||||
void bli_packm_init_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx
|
||||
);
|
||||
|
||||
void bli_packm_int_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx
|
||||
);
|
||||
|
||||
void bli_packm_int_check( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx );
|
||||
|
||||
@@ -34,109 +34,49 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
packm_t* packm_cntl_row = NULL;
|
||||
packm_t* packm_cntl_col = NULL;
|
||||
|
||||
packm_t* packm_cntl = NULL;
|
||||
|
||||
void bli_packm_cntl_init()
|
||||
cntl_t* bli_packm_cntl_obj_create
|
||||
(
|
||||
void* var_func,
|
||||
void* packm_var_func,
|
||||
bszid_t bmid_m,
|
||||
bszid_t bmid_n,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
bool_t rev_iter_if_lower,
|
||||
pack_t pack_schema,
|
||||
packbuf_t pack_buf_type,
|
||||
cntl_t* sub_node
|
||||
)
|
||||
{
|
||||
// Generally speaking, the BLIS_PACKED_ROWS and BLIS_PACKED_COLUMNS
|
||||
// are used by the level-2 operations. These schemas amount to simple
|
||||
// copies to row or column storage. These simple schemas may be used
|
||||
// by level-3 operations, but they should never be used for matrices
|
||||
// with structure (since they do not densify).
|
||||
// The BLIS_PACKED_ROW_PANELS and BLIS_PACKED_COL_PANELS schemas are
|
||||
// used only in level-3 operations. They pack to (typically) skinny
|
||||
// row and column panels, where the width of the panel is determined
|
||||
// by register blocksizes. It is assumed that matrices with structure
|
||||
// will be densified.
|
||||
cntl_t* cntl;
|
||||
packm_params_t* params;
|
||||
|
||||
// Create control trees to pack by rows.
|
||||
packm_cntl_row
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1, // When packing to rows:
|
||||
BLIS_VF, // used for m dimension
|
||||
BLIS_VF, // used for n dimension
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // do NOT iterate backwards if upper
|
||||
FALSE, // do NOT iterate backwards if lower
|
||||
BLIS_PACKED_ROWS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
// Allocate a packm_params_t struct.
|
||||
params = bli_malloc_intl( sizeof( packm_params_t ) );
|
||||
|
||||
// Initialize the packm_params_t struct.
|
||||
params->size = sizeof( packm_params_t );
|
||||
params->var_func = packm_var_func;
|
||||
params->bmid_m = bmid_m;
|
||||
params->bmid_n = bmid_n;
|
||||
params->does_invert_diag = does_invert_diag;
|
||||
params->rev_iter_if_upper = rev_iter_if_upper;
|
||||
params->rev_iter_if_lower = rev_iter_if_lower;
|
||||
params->pack_schema = pack_schema;
|
||||
params->pack_buf_type = pack_buf_type;
|
||||
|
||||
// Create control trees to pack by columns.
|
||||
packm_cntl_col
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1, // When packing to columns:
|
||||
BLIS_VF, // used for m dimension
|
||||
BLIS_VF, // used for n dimension
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // do NOT iterate backwards if upper
|
||||
FALSE, // do NOT iterate backwards if lower
|
||||
BLIS_PACKED_COLUMNS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
|
||||
|
||||
// Set defaults when we don't care whether the packing is by rows or
|
||||
// by columns.
|
||||
packm_cntl = packm_cntl_col;
|
||||
}
|
||||
|
||||
void bli_packm_cntl_finalize()
|
||||
{
|
||||
bli_cntl_obj_free( packm_cntl_row );
|
||||
bli_cntl_obj_free( packm_cntl_col );
|
||||
}
|
||||
|
||||
packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
bszid_t bmid_m,
|
||||
bszid_t bmid_n,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
bool_t rev_iter_if_lower,
|
||||
pack_t pack_schema,
|
||||
packbuf_t pack_buf_type )
|
||||
{
|
||||
packm_t* cntl;
|
||||
|
||||
cntl = ( packm_t* ) bli_malloc_intl( sizeof(packm_t) );
|
||||
|
||||
cntl->impl_type = impl_type;
|
||||
cntl->var_num = var_num;
|
||||
cntl->bmid_m = bmid_m;
|
||||
cntl->bmid_n = bmid_n;
|
||||
cntl->does_invert_diag = does_invert_diag;
|
||||
cntl->rev_iter_if_upper = rev_iter_if_upper;
|
||||
cntl->rev_iter_if_lower = rev_iter_if_lower;
|
||||
cntl->pack_schema = pack_schema;
|
||||
cntl->pack_buf_type = pack_buf_type;
|
||||
// It's important that we set the bszid field to BLIS_NO_PART to indicate
|
||||
// that no blocksize partitioning is performed. bli_cntl_free() will rely
|
||||
// on this information to know how to step through the thrinfo_t tree in
|
||||
// sync with the cntl_t tree.
|
||||
cntl = bli_cntl_obj_create
|
||||
(
|
||||
BLIS_NO_PART,
|
||||
var_func,
|
||||
params,
|
||||
sub_node
|
||||
);
|
||||
|
||||
return cntl;
|
||||
}
|
||||
|
||||
void bli_packm_cntl_obj_init( packm_t* cntl,
|
||||
impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
bszid_t bmid_m,
|
||||
bszid_t bmid_n,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
bool_t rev_iter_if_lower,
|
||||
pack_t pack_schema,
|
||||
packbuf_t pack_buf_type )
|
||||
{
|
||||
cntl->impl_type = impl_type;
|
||||
cntl->var_num = var_num;
|
||||
cntl->bmid_m = bmid_m;
|
||||
cntl->bmid_n = bmid_n;
|
||||
cntl->does_invert_diag = does_invert_diag;
|
||||
cntl->rev_iter_if_upper = rev_iter_if_upper;
|
||||
cntl->rev_iter_if_lower = rev_iter_if_lower;
|
||||
cntl->pack_schema = pack_schema;
|
||||
cntl->pack_buf_type = pack_buf_type;
|
||||
}
|
||||
|
||||
|
||||
@@ -32,56 +32,65 @@
|
||||
|
||||
*/
|
||||
|
||||
struct packm_s
|
||||
struct packm_params_s
|
||||
{
|
||||
impl_t impl_type;
|
||||
varnum_t var_num;
|
||||
bszid_t bmid_m;
|
||||
bszid_t bmid_n;
|
||||
bool_t does_invert_diag;
|
||||
bool_t rev_iter_if_upper;
|
||||
bool_t rev_iter_if_lower;
|
||||
pack_t pack_schema;
|
||||
packbuf_t pack_buf_type;
|
||||
uint64_t size; // size field must be present and come first.
|
||||
packm_voft var_func;
|
||||
bszid_t bmid_m;
|
||||
bszid_t bmid_n;
|
||||
bool_t does_invert_diag;
|
||||
bool_t rev_iter_if_upper;
|
||||
bool_t rev_iter_if_lower;
|
||||
pack_t pack_schema;
|
||||
packbuf_t pack_buf_type;
|
||||
};
|
||||
typedef struct packm_s packm_t;
|
||||
typedef struct packm_params_s packm_params_t;
|
||||
|
||||
#define cntl_bmid_m( cntl ) cntl->bmid_m
|
||||
#define cntl_bmid_n( cntl ) cntl->bmid_n
|
||||
#define bli_cntl_packm_params_var_func( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->var_func )
|
||||
|
||||
#define cntl_does_invert_diag( cntl ) cntl->does_invert_diag
|
||||
#define cntl_rev_iter_if_upper( cntl ) cntl->rev_iter_if_upper
|
||||
#define cntl_rev_iter_if_lower( cntl ) cntl->rev_iter_if_lower
|
||||
#define cntl_pack_schema( cntl ) cntl->pack_schema
|
||||
#define cntl_pack_buf_type( cntl ) cntl->pack_buf_type
|
||||
#define bli_cntl_packm_params_bmid_m( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->bmid_m )
|
||||
|
||||
#define bli_cntl_sub_packm( cntl ) cntl->sub_packm
|
||||
#define bli_cntl_sub_packm_a( cntl ) cntl->sub_packm_a
|
||||
#define bli_cntl_sub_packm_a11( cntl ) cntl->sub_packm_a11
|
||||
#define bli_cntl_sub_packm_b( cntl ) cntl->sub_packm_b
|
||||
#define bli_cntl_sub_packm_b11( cntl ) cntl->sub_packm_b11
|
||||
#define bli_cntl_sub_packm_c( cntl ) cntl->sub_packm_c
|
||||
#define bli_cntl_sub_packm_c11( cntl ) cntl->sub_packm_c11
|
||||
#define bli_cntl_packm_params_bmid_n( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->bmid_n )
|
||||
|
||||
void bli_packm_cntl_init( void );
|
||||
void bli_packm_cntl_finalize( void );
|
||||
packm_t* bli_packm_cntl_obj_create( impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
bszid_t bmid_m,
|
||||
bszid_t bmid_n,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
bool_t rev_iter_if_lower,
|
||||
pack_t pack_schema,
|
||||
packbuf_t pack_buf_type );
|
||||
void bli_packm_cntl_obj_init( packm_t* cntl,
|
||||
impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
bszid_t bmid_m,
|
||||
bszid_t bmid_n,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
bool_t rev_iter_if_lower,
|
||||
pack_t pack_schema,
|
||||
packbuf_t pack_buf_type );
|
||||
#define bli_cntl_packm_params_does_invert_diag( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->does_invert_diag )
|
||||
|
||||
#define bli_cntl_packm_params_rev_iter_if_upper( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->rev_iter_if_upper )
|
||||
|
||||
#define bli_cntl_packm_params_rev_iter_if_lower( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->rev_iter_if_lower )
|
||||
|
||||
#define bli_cntl_packm_params_pack_schema( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->pack_schema )
|
||||
|
||||
#define bli_cntl_packm_params_pack_buf_type( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->pack_buf_type )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_packm_cntl_obj_create
|
||||
(
|
||||
void* var_func,
|
||||
void* packm_var_func,
|
||||
bszid_t bmid_m,
|
||||
bszid_t bmid_n,
|
||||
bool_t does_invert_diag,
|
||||
bool_t rev_iter_if_upper,
|
||||
bool_t rev_iter_if_lower,
|
||||
pack_t pack_schema,
|
||||
packbuf_t pack_buf_type,
|
||||
cntl_t* sub_node
|
||||
);
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ void bli_packm_cntx_init( cntx_t* cntx )
|
||||
bli_gks_cntx_set_l1v_ker( BLIS_SETV_KER, cntx );
|
||||
|
||||
// Initialize the context with the global membrk object.
|
||||
bli_cntx_set_membrk( bli_mem_global_membrk(), cntx );
|
||||
bli_cntx_set_membrk( bli_memsys_global_membrk(), cntx );
|
||||
}
|
||||
|
||||
void bli_packm_cntx_finalize( cntx_t* cntx )
|
||||
|
||||
@@ -35,38 +35,43 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_packm_init( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
packm_t* cntl )
|
||||
siz_t bli_packm_init
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl
|
||||
)
|
||||
{
|
||||
// The purpose of packm_init() is to initialize an object P so that
|
||||
// a source object A can be packed into P via one of the packm
|
||||
// implementations. This initialization includes acquiring a suitable
|
||||
// block of memory from the memory allocator, if such a block of memory
|
||||
// has not already been allocated previously.
|
||||
// implementations. This initialization precedes the acquisition of a
|
||||
// suitable block of memory from the memory allocator (if such a block
|
||||
// of memory has not already been allocated previously).
|
||||
|
||||
invdiag_t invert_diag;
|
||||
pack_t schema;
|
||||
packord_t pack_ord_if_up;
|
||||
packord_t pack_ord_if_lo;
|
||||
packbuf_t pack_buf_type;
|
||||
bszid_t bmult_id_m;
|
||||
bszid_t bmult_id_n;
|
||||
obj_t c;
|
||||
bool_t does_invert_diag;
|
||||
bool_t rev_iter_if_upper;
|
||||
bool_t rev_iter_if_lower;
|
||||
//pack_t pack_schema;
|
||||
packbuf_t pack_buf_type;
|
||||
siz_t size_needed;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_packm_init_check( a, p, cntx );
|
||||
|
||||
// First check if we are to skip this operation because the control tree
|
||||
// is NULL, and if so, simply alias the object to its packed counterpart.
|
||||
if ( bli_cntl_is_noop( cntl ) )
|
||||
{
|
||||
bli_obj_alias_to( *a, *p );
|
||||
return;
|
||||
}
|
||||
// Extract various fields from the control tree.
|
||||
bmult_id_m = bli_cntl_packm_params_bmid_m( cntl );
|
||||
bmult_id_n = bli_cntl_packm_params_bmid_n( cntl );
|
||||
does_invert_diag = bli_cntl_packm_params_does_invert_diag( cntl );
|
||||
rev_iter_if_upper = bli_cntl_packm_params_rev_iter_if_upper( cntl );
|
||||
rev_iter_if_lower = bli_cntl_packm_params_rev_iter_if_lower( cntl );
|
||||
//pack_schema = bli_cntl_packm_params_pack_schema( cntl );
|
||||
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
|
||||
|
||||
#if 0
|
||||
// Let us now check to see if the object has already been packed. First
|
||||
// we check if it has been packed to an unspecified (row or column)
|
||||
// format, in which case we can alias the object and return.
|
||||
@@ -79,179 +84,150 @@ void bli_packm_init( obj_t* a,
|
||||
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
|
||||
{
|
||||
bli_obj_alias_to( *a, *p );
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// At this point, we can be assured that cntl is not NULL. Now we check
|
||||
// if the object has already been packed to the desired schema (as en-
|
||||
// coded in the control tree). If so, we can alias and return, as above.
|
||||
// Now we check if the object has already been packed to the desired
|
||||
// schema (as encoded in the control tree). If so, we can alias and
|
||||
// return 0.
|
||||
// NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED
|
||||
// and thus packing will be called for (but in some cases packing has
|
||||
// already taken place, or does not need to take place, and so that will
|
||||
// be indicated by the pack status). Also, not all combinations of
|
||||
// current pack status and desired pack schema are valid.
|
||||
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) )
|
||||
if ( bli_obj_pack_schema( *a ) == pack_schema )
|
||||
{
|
||||
bli_obj_alias_to( *a, *p );
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// If the object is marked as being filled with zeros, then we can skip
|
||||
// the packm operation entirely and alias. Notice that we use pack-aware
|
||||
// aliasing. This is needed because the object may have been packed in
|
||||
// a previous iteration, which means the object currently contains the
|
||||
// mem_t entry of an already-allocated block. bli_obj_alias_for_packing()
|
||||
// will avoid overwriting that mem_t entry, which means it can be
|
||||
// properly released later on.
|
||||
// the packm operation entirely and alias.
|
||||
if ( bli_obj_is_zeros( *a ) )
|
||||
{
|
||||
bli_obj_alias_for_packing( *a, *p );
|
||||
return;
|
||||
bli_obj_alias_to( *a, *p );
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Now, if we are not skipping the pack operation, then the only question
|
||||
// left is whether we are to typecast matrix a before packing.
|
||||
if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) )
|
||||
bli_abort();
|
||||
/*
|
||||
{
|
||||
// Initialize an object c for the intermediate typecast matrix.
|
||||
bli_packm_init_cast( a,
|
||||
p,
|
||||
&c );
|
||||
|
||||
// Copy/typecast matrix a to matrix c.
|
||||
bli_copym( a,
|
||||
&c );
|
||||
}
|
||||
else
|
||||
*/
|
||||
{
|
||||
// If no cast is needed, then aliasing object c to the original
|
||||
// matrix serves as a minor optimization. This causes the packm
|
||||
// implementation to pack directly from matrix a.
|
||||
bli_obj_alias_to( *a, c );
|
||||
}
|
||||
|
||||
|
||||
// Extract various fields from the control tree.
|
||||
pack_buf_type = cntl_pack_buf_type( cntl );
|
||||
bmult_id_m = cntl_bmid_m( cntl );
|
||||
bmult_id_n = cntl_bmid_n( cntl );
|
||||
|
||||
// Extract the schema from the context, depending on whether we are
|
||||
// We now ignore the pack_schema field in the control tree and
|
||||
// extract the schema from the context, depending on whether we are
|
||||
// preparing to pack a block of A or panel of B. For A and B, we must
|
||||
// obtain the schema from the context since the induced methods reuse
|
||||
// the same control trees used by native execution, and those induced
|
||||
// methods specify the schema used by the current execution phase
|
||||
// within the context (whereas the control tree does not change).
|
||||
pack_t schema;
|
||||
|
||||
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK )
|
||||
{
|
||||
schema = bli_cntx_get_pack_schema_a( cntx );
|
||||
//printf( "bli_packm_init: pack schema a = %x\n", schema );
|
||||
}
|
||||
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL )
|
||||
{
|
||||
schema = bli_cntx_get_pack_schema_b( cntx );
|
||||
//printf( "bli_packm_init: pack schema b = %x\n", schema );
|
||||
}
|
||||
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
|
||||
{
|
||||
// If we get a request to pack C for some reason, it is likely
|
||||
// not part of an induced method, and so it would be safe (and
|
||||
// necessary) to read the pack schema from the control tree.
|
||||
schema = cntl_pack_schema( cntl );
|
||||
//printf( "bli_packm_init: pack schema c = %x\n", schema );
|
||||
schema = bli_cntl_packm_params_pack_schema( cntl );
|
||||
}
|
||||
|
||||
// Prepare a few other variables based on properties of the control
|
||||
// tree.
|
||||
|
||||
if ( cntl_does_invert_diag( cntl ) ) invert_diag = BLIS_INVERT_DIAG;
|
||||
else invert_diag = BLIS_NO_INVERT_DIAG;
|
||||
invdiag_t invert_diag;
|
||||
packord_t pack_ord_if_up;
|
||||
packord_t pack_ord_if_lo;
|
||||
|
||||
if ( cntl_rev_iter_if_upper( cntl ) ) pack_ord_if_up = BLIS_PACK_REV_IF_UPPER;
|
||||
else pack_ord_if_up = BLIS_PACK_FWD_IF_UPPER;
|
||||
if ( does_invert_diag ) invert_diag = BLIS_INVERT_DIAG;
|
||||
else invert_diag = BLIS_NO_INVERT_DIAG;
|
||||
|
||||
if ( cntl_rev_iter_if_lower( cntl ) ) pack_ord_if_lo = BLIS_PACK_REV_IF_LOWER;
|
||||
else pack_ord_if_lo = BLIS_PACK_FWD_IF_LOWER;
|
||||
if ( rev_iter_if_upper ) pack_ord_if_up = BLIS_PACK_REV_IF_UPPER;
|
||||
else pack_ord_if_up = BLIS_PACK_FWD_IF_UPPER;
|
||||
|
||||
if ( rev_iter_if_lower ) pack_ord_if_lo = BLIS_PACK_REV_IF_LOWER;
|
||||
else pack_ord_if_lo = BLIS_PACK_FWD_IF_LOWER;
|
||||
|
||||
// Initialize object p for the final packed matrix.
|
||||
bli_packm_init_pack( invert_diag,
|
||||
schema,
|
||||
pack_ord_if_up,
|
||||
pack_ord_if_lo,
|
||||
pack_buf_type,
|
||||
bmult_id_m,
|
||||
bmult_id_n,
|
||||
&c,
|
||||
p,
|
||||
cntx );
|
||||
size_needed
|
||||
=
|
||||
bli_packm_init_pack
|
||||
(
|
||||
invert_diag,
|
||||
schema,
|
||||
pack_ord_if_up,
|
||||
pack_ord_if_lo,
|
||||
bmult_id_m,
|
||||
bmult_id_n,
|
||||
a,
|
||||
p,
|
||||
cntx
|
||||
);
|
||||
|
||||
// Now p is ready to be packed.
|
||||
// Return the size needed for memory allocation of the packed buffer.
|
||||
return size_needed;
|
||||
}
|
||||
|
||||
|
||||
void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
pack_t schema,
|
||||
packord_t pack_ord_if_up,
|
||||
packord_t pack_ord_if_lo,
|
||||
packbuf_t pack_buf_type,
|
||||
bszid_t bmult_id_m,
|
||||
bszid_t bmult_id_n,
|
||||
obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx )
|
||||
siz_t bli_packm_init_pack
|
||||
(
|
||||
invdiag_t invert_diag,
|
||||
pack_t schema,
|
||||
packord_t pack_ord_if_up,
|
||||
packord_t pack_ord_if_lo,
|
||||
bszid_t bmult_id_m,
|
||||
bszid_t bmult_id_n,
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx
|
||||
)
|
||||
{
|
||||
num_t dt = bli_obj_datatype( *c );
|
||||
trans_t transc = bli_obj_onlytrans_status( *c );
|
||||
dim_t m_c = bli_obj_length( *c );
|
||||
dim_t n_c = bli_obj_width( *c );
|
||||
num_t dt = bli_obj_datatype( *a );
|
||||
trans_t transa = bli_obj_onlytrans_status( *a );
|
||||
dim_t m_a = bli_obj_length( *a );
|
||||
dim_t n_a = bli_obj_width( *a );
|
||||
dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_m, cntx );
|
||||
dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_m, cntx );
|
||||
dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_n, cntx );
|
||||
dim_t bmult_n_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_n, cntx );
|
||||
|
||||
membrk_t* membrk = bli_cntx_get_membrk( cntx );
|
||||
|
||||
mem_t* mem_p;
|
||||
dim_t m_p, n_p;
|
||||
dim_t m_p_pad, n_p_pad;
|
||||
siz_t size_p;
|
||||
siz_t elem_size_p;
|
||||
inc_t rs_p, cs_p;
|
||||
inc_t is_p;
|
||||
void* buf;
|
||||
|
||||
|
||||
// We begin by copying the basic fields of c. We do NOT copy the
|
||||
// pack_mem entry from c because the entry in p may be cached from
|
||||
// a previous iteration, and thus we don't want to overwrite it.
|
||||
bli_obj_alias_for_packing( *c, *p );
|
||||
// We begin by copying the fields of A.
|
||||
bli_obj_alias_to( *a, *p );
|
||||
|
||||
// Update the dimension fields to explicitly reflect a transposition,
|
||||
// if needed.
|
||||
// Then, clear the conjugation and transposition fields from the object
|
||||
// since matrix packing in BLIS is deemed to take care of all conjugation
|
||||
// and transposition necessary.
|
||||
// Then, we adjust the properties of p when c needs a transposition.
|
||||
// We negate the diagonal offset, and if c is upper- or lower-stored,
|
||||
// we either toggle the uplo of p.
|
||||
// Finally, if we mark p as dense since we assume that all matrices,
|
||||
// Then, we adjust the properties of P when A needs a transposition.
|
||||
// We negate the diagonal offset, and if A is upper- or lower-stored,
|
||||
// we either toggle the uplo of P.
|
||||
// Finally, if we mark P as dense since we assume that all matrices,
|
||||
// regardless of structure, will be densified.
|
||||
bli_obj_set_dims_with_trans( transc, m_c, n_c, *p );
|
||||
bli_obj_set_dims_with_trans( transa, m_a, n_a, *p );
|
||||
bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p );
|
||||
if ( bli_does_trans( transc ) )
|
||||
if ( bli_does_trans( transa ) )
|
||||
{
|
||||
bli_obj_negate_diag_offset( *p );
|
||||
if ( bli_obj_is_upper_or_lower( *c ) )
|
||||
if ( bli_obj_is_upper_or_lower( *a ) )
|
||||
bli_obj_toggle_uplo( *p );
|
||||
}
|
||||
|
||||
// If we are packing micro-panels, mark p as dense. Otherwise, we are
|
||||
// If we are packing micro-panels, mark P as dense. Otherwise, we are
|
||||
// probably being called in the context of a level-2 operation, in
|
||||
// which case we do not want to overwrite the uplo field of p (inherited
|
||||
// from c) with BLIS_DENSE because that information may be needed by
|
||||
// which case we do not want to overwrite the uplo field of P (inherited
|
||||
// from A) with BLIS_DENSE because that information may be needed by
|
||||
// the level-2 operation's unblocked variant to decide whether to
|
||||
// execute a "lower" or "upper" branch of code.
|
||||
if ( bli_is_panel_packed( schema ) )
|
||||
@@ -265,7 +241,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
// Set the invert diagonal field.
|
||||
bli_obj_set_invert_diag( invert_diag, *p );
|
||||
|
||||
// Set the pack status of p to the pack schema prescribed in the control
|
||||
// Set the pack status of P to the pack schema prescribed in the control
|
||||
// tree node.
|
||||
bli_obj_set_pack_schema( schema, *p );
|
||||
|
||||
@@ -273,15 +249,11 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
bli_obj_set_pack_order_if_upper( pack_ord_if_up, *p );
|
||||
bli_obj_set_pack_order_if_lower( pack_ord_if_lo, *p );
|
||||
|
||||
// Extract the address of the mem_t object within p that will track
|
||||
// properties of the packed buffer.
|
||||
mem_p = bli_obj_pack_mem( *p );
|
||||
|
||||
// Compute the dimensions padded by the dimension multiples. These
|
||||
// dimensions will be the dimensions of the packed matrices, including
|
||||
// zero-padding, and will be used by the macro- and micro-kernels.
|
||||
// We compute them by starting with the effective dimensions of c (now
|
||||
// in p) and aligning them to the dimension multiples (typically equal
|
||||
// We compute them by starting with the effective dimensions of A (now
|
||||
// in P) and aligning them to the dimension multiples (typically equal
|
||||
// to register blocksizes). This does waste a little bit of space for
|
||||
// level-2 operations, but that's okay with us.
|
||||
m_p = bli_obj_length( *p );
|
||||
@@ -295,9 +267,9 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
bli_obj_set_padded_dims( m_p_pad, n_p_pad, *p );
|
||||
|
||||
// Now we prepare to compute strides, align them, and compute the
|
||||
// total number of bytes needed for the packed buffer. After that,
|
||||
// we will acquire an appropriate block of memory from the memory
|
||||
// allocator.
|
||||
// total number of bytes needed for the packed buffer. The caller
|
||||
// will then use that value to acquire an appropriate block of memory
|
||||
// from the memory allocator.
|
||||
|
||||
// Extract the element size for the packed object.
|
||||
elem_size_p = bli_obj_elem_size( *p );
|
||||
@@ -320,7 +292,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
rs_p = bli_align_dim_to_size( rs_p, elem_size_p,
|
||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
||||
|
||||
// Store the strides in p.
|
||||
// Store the strides in P.
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
|
||||
// Compute the size of the packed buffer.
|
||||
@@ -343,7 +315,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
cs_p = bli_align_dim_to_size( cs_p, elem_size_p,
|
||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
||||
|
||||
// Store the strides in p.
|
||||
// Store the strides in P.
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
|
||||
// Compute the size of the packed buffer.
|
||||
@@ -431,7 +403,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( m_p_pad / m_panel );
|
||||
else is_p = 1;
|
||||
|
||||
// Store the strides and panel dimension in p.
|
||||
// Store the strides and panel dimension in P.
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
bli_obj_set_imag_stride( is_p, *p );
|
||||
bli_obj_set_panel_dim( m_panel, *p );
|
||||
@@ -524,7 +496,7 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( n_p_pad / n_panel );
|
||||
else is_p = 1;
|
||||
|
||||
// Store the strides and panel dimension in p.
|
||||
// Store the strides and panel dimension in P.
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
bli_obj_set_imag_stride( is_p, *p );
|
||||
bli_obj_set_panel_dim( n_panel, *p );
|
||||
@@ -547,99 +519,6 @@ void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
size_p = 0;
|
||||
}
|
||||
|
||||
|
||||
if ( bli_mem_is_unalloc( mem_p ) )
|
||||
{
|
||||
// If the mem_t object of p has not yet been allocated, then acquire
|
||||
// a memory block of type pack_buf_type.
|
||||
bli_membrk_acquire_m( membrk,
|
||||
size_p,
|
||||
pack_buf_type,
|
||||
mem_p );
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the mem_t object is currently allocated and smaller than is
|
||||
// needed, then it must have been allocated for a different type
|
||||
// of object (a different pack_buf_type value), so we must first
|
||||
// release it and then re-acquire it using the new size and new
|
||||
// pack_buf_type value.
|
||||
if ( bli_mem_size( mem_p ) < size_p )
|
||||
{
|
||||
bli_membrk_release( mem_p );
|
||||
bli_membrk_acquire_m( membrk,
|
||||
size_p,
|
||||
pack_buf_type,
|
||||
mem_p );
|
||||
}
|
||||
}
|
||||
|
||||
// Grab the buffer address from the mem_t object and copy it to the
|
||||
// main object buffer field. (Sometimes this buffer address will be
|
||||
// copied when the value is already up-to-date, because it persists
|
||||
// in the main object buffer field across loop iterations.)
|
||||
buf = bli_mem_buffer( mem_p );
|
||||
bli_obj_set_buffer( buf, *p );
|
||||
|
||||
return size_p;
|
||||
}
|
||||
|
||||
void bli_packm_release( obj_t* p,
|
||||
packm_t* cntl )
|
||||
{
|
||||
if ( !bli_cntl_is_noop( cntl ) )
|
||||
bli_obj_release_pack( p );
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
void bli_packm_init_cast( obj_t* a,
|
||||
obj_t* p,
|
||||
obj_t* c )
|
||||
{
|
||||
// The idea here is that we want to create an object c that is identical
|
||||
// to object a, except that:
|
||||
// (1) the storage datatype of c is equal to the target datatype of a,
|
||||
// with the element size of c adjusted accordingly,
|
||||
// (2) the view offset of c is reset to (0,0),
|
||||
// (3) object c's main buffer is set to a new memory region acquired
|
||||
// from the memory manager, or extracted from p if a mem entry is
|
||||
// already available, (After acquring a mem entry from the memory
|
||||
// manager, it is cached within p for quick access later on.)
|
||||
// (4) object c is marked as being stored in a standard, contiguous
|
||||
// format (ie: a column-major order).
|
||||
// Any transposition encoded within object a will not be handled here,
|
||||
// but rather will be handled in the packm implementation. That way,
|
||||
// the only thing castm needs to do is cast.
|
||||
|
||||
num_t dt_targ_a = bli_obj_target_datatype( *a );
|
||||
dim_t m_a = bli_obj_length( *a );
|
||||
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
|
||||
inc_t rs_c, cs_c;
|
||||
|
||||
// We begin by copying the basic fields of a.
|
||||
bli_obj_alias_to( *a, *c );
|
||||
|
||||
// Update datatype and element size fields.
|
||||
bli_obj_set_datatype( dt_targ_a, *c );
|
||||
bli_obj_set_elem_size( elem_size_c, *c );
|
||||
|
||||
// Reset the view offsets to (0,0).
|
||||
bli_obj_set_offs( 0, 0, *c );
|
||||
|
||||
// Check the mem_t entry of p associated with the cast buffer. If it is
|
||||
// NULL, then acquire memory sufficient to hold the object data and cache
|
||||
// it to p. (Otherwise, if it is non-NULL, then memory has already been
|
||||
// acquired from the memory manager and cached.) We then set the main
|
||||
// buffer of c to the cached address of the cast memory.
|
||||
bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
|
||||
|
||||
// Update the strides. We set the increments to reflect column-major order
|
||||
// storage. We start the leading dimension out as m(a) and increment it if
|
||||
// necessary so that the beginning of each column is aligned.
|
||||
cs_c = bli_align_dim_to_size( m_a, elem_size_c,
|
||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
||||
rs_c = 1;
|
||||
bli_obj_set_strides( rs_c, cs_c, *c );
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
@@ -32,28 +32,24 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_init( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
packm_t* cntl );
|
||||
siz_t bli_packm_init
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl
|
||||
);
|
||||
|
||||
void bli_packm_init_pack( invdiag_t invert_diag,
|
||||
pack_t pack_schema,
|
||||
packord_t pack_ord_if_up,
|
||||
packord_t pack_ord_if_lo,
|
||||
packbuf_t pack_buf_type,
|
||||
bszid_t mr_id,
|
||||
bszid_t nr_id,
|
||||
obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx );
|
||||
|
||||
/*
|
||||
void bli_packm_init_cast( obj_t* a,
|
||||
obj_t* p,
|
||||
obj_t* c );
|
||||
*/
|
||||
|
||||
void bli_packm_release( obj_t* p,
|
||||
packm_t* cntl );
|
||||
siz_t bli_packm_init_pack
|
||||
(
|
||||
invdiag_t invert_diag,
|
||||
pack_t schema,
|
||||
packord_t pack_ord_if_up,
|
||||
packord_t pack_ord_if_lo,
|
||||
bszid_t bmult_id_m,
|
||||
bszid_t bmult_id_n,
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx
|
||||
);
|
||||
|
||||
|
||||
@@ -34,33 +34,16 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#define FUNCPTR_T packm_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
thrinfo_t* t );
|
||||
|
||||
static FUNCPTR_T vars[6][3] =
|
||||
void bli_packm_int
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
// unblocked optimized unblocked blocked
|
||||
{ bli_packm_unb_var1, NULL, bli_packm_blk_var1 },
|
||||
{ NULL, NULL, NULL, },
|
||||
{ NULL, NULL, NULL, },
|
||||
{ NULL, NULL, NULL, },
|
||||
{ NULL, NULL, NULL, },
|
||||
{ NULL, NULL, NULL, },
|
||||
};
|
||||
|
||||
void bli_packm_int( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
packm_t* cntl,
|
||||
thrinfo_t* thread )
|
||||
{
|
||||
varnum_t n;
|
||||
impl_t i;
|
||||
FUNCPTR_T f;
|
||||
packm_voft f;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
@@ -70,14 +53,6 @@ void bli_packm_int( obj_t* a,
|
||||
// it, then we should fold it into the next alias-and-early-exit block.
|
||||
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
|
||||
|
||||
// First check if we are to skip this operation because the control tree
|
||||
// is NULL. We return without taking any action because a was already
|
||||
// aliased to p in packm_init().
|
||||
if ( bli_cntl_is_noop( cntl ) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Let us now check to see if the object has already been packed. First
|
||||
// we check if it has been packed to an unspecified (row or column)
|
||||
// format, in which case we can return, since by now aliasing has already
|
||||
@@ -101,7 +76,7 @@ void bli_packm_int( obj_t* a,
|
||||
// already taken place, or does not need to take place, and so that will
|
||||
// be indicated by the pack status). Also, not all combinations of
|
||||
// current pack status and desired pack schema are valid.
|
||||
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) )
|
||||
if ( bli_obj_pack_schema( *a ) == bli_cntl_packm_params_pack_schema( cntl ) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -113,21 +88,17 @@ void bli_packm_int( obj_t* a,
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = bli_cntl_var_num( cntl );
|
||||
i = bli_cntl_impl_type( cntl );
|
||||
|
||||
// Index into the variant array to extract the correct function pointer.
|
||||
f = vars[n][i];
|
||||
// Extract the function pointer from the current control tree node.
|
||||
f = bli_cntl_packm_params_var_func( cntl );
|
||||
|
||||
// Invoke the variant with kappa_use.
|
||||
f( a,
|
||||
p,
|
||||
cntx,
|
||||
thread );
|
||||
|
||||
// Barrier so that packing is done before computation
|
||||
bli_thread_obarrier( thread );
|
||||
f
|
||||
(
|
||||
a,
|
||||
p,
|
||||
cntx,
|
||||
cntl,
|
||||
thread
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -32,9 +32,11 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_int( obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
packm_t* cntl,
|
||||
thrinfo_t* thread );
|
||||
|
||||
void bli_packm_int
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
@@ -41,7 +41,8 @@ thrinfo_t* bli_packm_thrinfo_create
|
||||
thrcomm_t* icomm,
|
||||
dim_t icomm_id,
|
||||
dim_t n_way,
|
||||
dim_t work_id
|
||||
dim_t work_id,
|
||||
thrinfo_t* sub_node
|
||||
)
|
||||
{
|
||||
thrinfo_t* thread = bli_malloc_intl( sizeof( thrinfo_t ) );
|
||||
@@ -53,9 +54,8 @@ thrinfo_t* bli_packm_thrinfo_create
|
||||
icomm, icomm_id,
|
||||
n_way,
|
||||
work_id,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
FALSE,
|
||||
sub_node
|
||||
);
|
||||
|
||||
return thread;
|
||||
@@ -69,7 +69,8 @@ void bli_packm_thrinfo_init
|
||||
thrcomm_t* icomm,
|
||||
dim_t icomm_id,
|
||||
dim_t n_way,
|
||||
dim_t work_id
|
||||
dim_t work_id,
|
||||
thrinfo_t* sub_node
|
||||
)
|
||||
{
|
||||
bli_thrinfo_init
|
||||
@@ -78,9 +79,8 @@ void bli_packm_thrinfo_init
|
||||
ocomm, ocomm_id,
|
||||
icomm, icomm_id,
|
||||
n_way, work_id,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
FALSE,
|
||||
sub_node
|
||||
);
|
||||
}
|
||||
|
||||
@@ -95,7 +95,8 @@ void bli_packm_thrinfo_init_single
|
||||
&BLIS_SINGLE_COMM, 0,
|
||||
&BLIS_SINGLE_COMM, 0,
|
||||
1,
|
||||
0
|
||||
0,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -49,7 +49,8 @@ thrinfo_t* bli_packm_thrinfo_create
|
||||
thrcomm_t* icomm,
|
||||
dim_t icomm_id,
|
||||
dim_t n_way,
|
||||
dim_t work_id
|
||||
dim_t work_id,
|
||||
thrinfo_t* sub_node
|
||||
);
|
||||
|
||||
void bli_packm_thrinfo_init
|
||||
@@ -60,7 +61,8 @@ void bli_packm_thrinfo_init
|
||||
thrcomm_t* icomm,
|
||||
dim_t icomm_id,
|
||||
dim_t n_way,
|
||||
dim_t work_id
|
||||
dim_t work_id,
|
||||
thrinfo_t* sub_node
|
||||
);
|
||||
|
||||
void bli_packm_thrinfo_init_single
|
||||
|
||||
@@ -55,10 +55,14 @@ typedef void (*FUNCPTR_T)(
|
||||
static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1);
|
||||
|
||||
|
||||
void bli_packm_unb_var1( obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
thrinfo_t* thread )
|
||||
void bli_packm_unb_var1
|
||||
(
|
||||
obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
num_t dt_cp = bli_obj_datatype( *c );
|
||||
|
||||
|
||||
@@ -32,10 +32,14 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_unb_var1( obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
thrinfo_t* thread );
|
||||
void bli_packm_unb_var1
|
||||
(
|
||||
obj_t* c,
|
||||
obj_t* p,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
|
||||
@@ -32,6 +32,5 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_trsm_cntx_init( void );
|
||||
void bli_trsm_cntx_finalize( void );
|
||||
#include "bli_scalm_cntl.h"
|
||||
|
||||
@@ -34,38 +34,25 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
scalm_t* scalm_cntl = NULL;
|
||||
|
||||
void bli_scalm_cntl_init()
|
||||
cntl_t* bli_scalm_cntl_obj_create
|
||||
(
|
||||
void* var_func,
|
||||
cntl_t* sub_node
|
||||
)
|
||||
{
|
||||
scalm_cntl = bli_scalm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1 );
|
||||
}
|
||||
cntl_t* cntl;
|
||||
|
||||
void bli_scalm_cntl_finalize()
|
||||
{
|
||||
bli_cntl_obj_free( scalm_cntl );
|
||||
}
|
||||
|
||||
|
||||
scalm_t* bli_scalm_cntl_obj_create( impl_t impl_type,
|
||||
varnum_t var_num )
|
||||
{
|
||||
scalm_t* cntl;
|
||||
|
||||
cntl = ( scalm_t* ) bli_malloc_intl( sizeof(scalm_t) );
|
||||
|
||||
cntl->impl_type = impl_type;
|
||||
cntl->var_num = var_num;
|
||||
// It's important that we set the bszid field to BLIS_NO_PART to indicate
|
||||
// that no blocksize partitioning is performed. bli_cntl_free() will rely
|
||||
// on this information to know how to step through the thrinfo_t tree in
|
||||
// sync with the cntl_t tree.
|
||||
cntl = bli_cntl_obj_create
|
||||
(
|
||||
BLIS_NO_PART,
|
||||
var_func,
|
||||
NULL,
|
||||
sub_node
|
||||
);
|
||||
|
||||
return cntl;
|
||||
}
|
||||
|
||||
void bli_scalm_cntl_obj_init( scalm_t* cntl,
|
||||
impl_t impl_type,
|
||||
varnum_t var_num )
|
||||
{
|
||||
cntl->impl_type = impl_type;
|
||||
cntl->var_num = var_num;
|
||||
}
|
||||
|
||||
|
||||
@@ -32,20 +32,9 @@
|
||||
|
||||
*/
|
||||
|
||||
struct scalm_s
|
||||
{
|
||||
impl_t impl_type;
|
||||
varnum_t var_num;
|
||||
};
|
||||
typedef struct scalm_s scalm_t;
|
||||
|
||||
#define bli_cntl_sub_scalm( cntl ) cntl->sub_scalm
|
||||
|
||||
void bli_scalm_cntl_init( void );
|
||||
void bli_scalm_cntl_finalize( void );
|
||||
scalm_t* bli_scalm_cntl_obj_create( impl_t impl_type,
|
||||
varnum_t var_num );
|
||||
void bli_scalm_cntl_obj_init( scalm_t* cntl,
|
||||
impl_t impl_type,
|
||||
varnum_t var_num );
|
||||
|
||||
cntl_t* bli_scalm_cntl_obj_create
|
||||
(
|
||||
void* var_func,
|
||||
cntl_t* sub_node
|
||||
);
|
||||
|
||||
@@ -37,8 +37,7 @@
|
||||
#include "bli_unpackm_int.h"
|
||||
|
||||
#include "bli_unpackm_unb_var1.h"
|
||||
//#include "bli_unpackm_blk_var1.h"
|
||||
|
||||
#include "bli_unpackm_blk_var2.h"
|
||||
#include "bli_unpackm_blk_var1.h"
|
||||
|
||||
#include "bli_unpackm_cxk.h"
|
||||
|
||||
@@ -52,13 +52,17 @@ typedef void (*FUNCPTR_T)(
|
||||
cntx_t* cntx
|
||||
);
|
||||
|
||||
static FUNCPTR_T GENARRAY(ftypes,unpackm_blk_var2);
|
||||
static FUNCPTR_T GENARRAY(ftypes,unpackm_blk_var1);
|
||||
|
||||
|
||||
void bli_unpackm_blk_var2( obj_t* p,
|
||||
obj_t* c,
|
||||
cntx_t* cntx,
|
||||
unpackm_t* cntl )
|
||||
void bli_unpackm_blk_var1
|
||||
(
|
||||
obj_t* p,
|
||||
obj_t* c,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
num_t dt_cp = bli_obj_datatype( *c );
|
||||
|
||||
@@ -266,5 +270,5 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( unpackm_blk_var2 )
|
||||
INSERT_GENTFUNC_BASIC0( unpackm_blk_var1 )
|
||||
|
||||
@@ -32,14 +32,35 @@
|
||||
|
||||
*/
|
||||
|
||||
#define bli_thrinfo_sub_self( thread ) thread->sub_l3op
|
||||
#define bli_thrinfo_sub_opackm( thread ) thread->opackm
|
||||
#define bli_thrinfo_sub_ipackm( thread ) thread->ipackm
|
||||
void bli_unpackm_blk_var1
|
||||
(
|
||||
obj_t* p,
|
||||
obj_t* c,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
#define trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
|
||||
//thrinfo_t** bli_trmm_thrinfo_create_paths( bool_t jc_dependency );
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
struc_t strucc, \
|
||||
doff_t diagoffc, \
|
||||
diag_t diagc, \
|
||||
uplo_t uploc, \
|
||||
trans_t transc, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
dim_t m_panel, \
|
||||
dim_t n_panel, \
|
||||
void* p, inc_t rs_p, inc_t cs_p, \
|
||||
dim_t pd_p, inc_t ps_p, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( unpackm_blk_var1 )
|
||||
|
||||
@@ -34,10 +34,12 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_unpackm_check( obj_t* p,
|
||||
obj_t* a,
|
||||
cntx_t* cntx,
|
||||
unpackm_t* cntl )
|
||||
void bli_unpackm_int_check
|
||||
(
|
||||
obj_t* p,
|
||||
obj_t* a,
|
||||
cntx_t* cntx
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
|
||||
@@ -32,7 +32,10 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_unpackm_check( obj_t* p,
|
||||
obj_t* a,
|
||||
cntx_t* cntx,
|
||||
unpackm_t* cntl );
|
||||
void bli_unpackm_int_check
|
||||
(
|
||||
obj_t* p,
|
||||
obj_t* a,
|
||||
cntx_t* cntx
|
||||
);
|
||||
|
||||
|
||||
@@ -34,42 +34,35 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
unpackm_t* unpackm_cntl = NULL;
|
||||
|
||||
void bli_unpackm_cntl_init()
|
||||
cntl_t* bli_unpackm_cntl_obj_create
|
||||
(
|
||||
void* var_func,
|
||||
void* unpackm_var_func,
|
||||
cntl_t* sub_node
|
||||
)
|
||||
{
|
||||
unpackm_cntl = bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
NULL ); // no blocksize needed
|
||||
}
|
||||
cntl_t* cntl;
|
||||
unpackm_params_t* params;
|
||||
|
||||
void bli_unpackm_cntl_finalize()
|
||||
{
|
||||
bli_cntl_obj_free( unpackm_cntl );
|
||||
}
|
||||
// Allocate an unpackm_params_t struct.
|
||||
params = bli_malloc_intl( sizeof( unpackm_params_t ) );
|
||||
|
||||
unpackm_t* bli_unpackm_cntl_obj_create( impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
blksz_t* b )
|
||||
{
|
||||
unpackm_t* cntl;
|
||||
// Initialize the unpackm_params_t struct.
|
||||
params->size = sizeof( unpackm_params_t );
|
||||
params->var_func = unpackm_var_func;
|
||||
|
||||
cntl = ( unpackm_t* ) bli_malloc_intl( sizeof(unpackm_t) );
|
||||
|
||||
cntl->impl_type = impl_type;
|
||||
cntl->var_num = var_num;
|
||||
cntl->b = b;
|
||||
// It's important that we set the bszid field to BLIS_NO_PART to indicate
|
||||
// that no blocksize partitioning is performed. bli_cntl_free() will rely
|
||||
// on this information to know how to step through the thrinfo_t tree in
|
||||
// sync with the cntl_t tree.
|
||||
cntl = bli_cntl_obj_create
|
||||
(
|
||||
BLIS_NO_PART,
|
||||
var_func,
|
||||
params,
|
||||
sub_node
|
||||
);
|
||||
|
||||
return cntl;
|
||||
}
|
||||
|
||||
void bli_unpackm_cntl_obj_init( unpackm_t* cntl,
|
||||
impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
blksz_t* b )
|
||||
{
|
||||
cntl->impl_type = impl_type;
|
||||
cntl->var_num = var_num;
|
||||
cntl->b = b;
|
||||
}
|
||||
|
||||
|
||||
@@ -32,28 +32,23 @@
|
||||
|
||||
*/
|
||||
|
||||
struct unpackm_s
|
||||
struct unpackm_params_s
|
||||
{
|
||||
impl_t impl_type;
|
||||
varnum_t var_num;
|
||||
blksz_t* b;
|
||||
uint64_t size; // size field must be present and come first.
|
||||
unpackm_voft var_func;
|
||||
};
|
||||
typedef struct unpackm_s unpackm_t;
|
||||
typedef struct unpackm_params_s unpackm_params_t;
|
||||
|
||||
#define bli_cntl_sub_unpackm( cntl ) cntl->sub_unpackm
|
||||
#define bli_cntl_sub_unpackm_a( cntl ) cntl->sub_unpackm_a
|
||||
#define bli_cntl_sub_unpackm_a11( cntl ) cntl->sub_unpackm_a11
|
||||
#define bli_cntl_sub_unpackm_b( cntl ) cntl->sub_unpackm_b
|
||||
#define bli_cntl_sub_unpackm_b11( cntl ) cntl->sub_unpackm_b11
|
||||
#define bli_cntl_sub_unpackm_c( cntl ) cntl->sub_unpackm_c
|
||||
#define bli_cntl_sub_unpackm_c11( cntl ) cntl->sub_unpackm_c11
|
||||
#define bli_cntl_unpackm_params_var_func( cntl ) \
|
||||
\
|
||||
( ( (unpackm_params_t*)(cntl)->params )->var_func )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_unpackm_cntl_obj_create
|
||||
(
|
||||
void* var_func,
|
||||
void* unpackm_var_func,
|
||||
cntl_t* sub_node
|
||||
);
|
||||
|
||||
void bli_unpackm_cntl_init( void );
|
||||
void bli_unpackm_cntl_finalize( void );
|
||||
unpackm_t* bli_unpackm_cntl_obj_create( impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
blksz_t* b );
|
||||
void bli_unpackm_cntl_obj_init( unpackm_t* cntl,
|
||||
impl_t impl_type,
|
||||
varnum_t var_num,
|
||||
blksz_t* b );
|
||||
|
||||
@@ -152,15 +152,16 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname)( \
|
||||
conj_t conjp, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* beta, \
|
||||
void* p, inc_t ldp, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjp, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* beta, \
|
||||
void* p, inc_t ldp, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
dim_t panel_dim; \
|
||||
num_t dt; \
|
||||
|
||||
@@ -34,188 +34,43 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#define FUNCPTR_T unpackm_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)( obj_t* p,
|
||||
obj_t* a,
|
||||
cntx_t* cntx,
|
||||
unpackm_t* cntl );
|
||||
|
||||
static FUNCPTR_T vars[2][3] =
|
||||
void bli_unpackm_int
|
||||
(
|
||||
obj_t* p,
|
||||
obj_t* a,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
// unblocked optimized unblocked blocked
|
||||
{ bli_unpackm_unb_var1, NULL, NULL, },
|
||||
{ NULL, NULL, bli_unpackm_blk_var2, },
|
||||
};
|
||||
unpackm_voft f;
|
||||
|
||||
void bli_unpackm_int( obj_t* p,
|
||||
obj_t* a,
|
||||
cntx_t* cntx,
|
||||
unpackm_t* cntl,
|
||||
thrinfo_t* thread )
|
||||
{
|
||||
// The unpackm operation consists of an optional post-process: castm.
|
||||
// (This post-process is analogous to the castm pre-process in packm.)
|
||||
// Here are the following possible ways unpackm can execute:
|
||||
// 1. unpack and cast: Unpack to a temporary matrix c and then cast
|
||||
// c to a.
|
||||
// 2. unpack only: Unpack directly to matrix a since typecasting is
|
||||
// not needed.
|
||||
// 3. cast only: Not yet supported / not used.
|
||||
// 4. no-op: The control tree directs us to skip the unpack operation
|
||||
// entirely. No action is taken.
|
||||
|
||||
obj_t c;
|
||||
|
||||
varnum_t n;
|
||||
impl_t i;
|
||||
FUNCPTR_T f;
|
||||
|
||||
// Sanity check; A should never have a zero dimension. If we must support
|
||||
// it, then we should fold it into the next alias-and-early-exit block.
|
||||
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
|
||||
|
||||
// First check if we are to skip this operation because the control tree
|
||||
// is NULL, and if so, simply return.
|
||||
if ( bli_cntl_is_noop( cntl ) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_unpackm_int_check( p, a, cntx );
|
||||
|
||||
// If p was aliased to a during the pack stage (because it was already
|
||||
// in an acceptable packed/contiguous format), then no unpack is actually
|
||||
// necessary, so we return.
|
||||
if ( bli_obj_is_alias_of( *p, *a ) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
if ( bli_obj_is_alias_of( *p, *a ) ) return;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_unpackm_check( p, a, cntx, cntl );
|
||||
|
||||
// Now, if we are not skipping the unpack operation, then the only
|
||||
// question left is whether we are to typecast matrix a after unpacking.
|
||||
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
|
||||
bli_abort();
|
||||
/*
|
||||
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
|
||||
{
|
||||
// Initialize an object c for the intermediate typecast matrix.
|
||||
bli_unpackm_init_cast( p,
|
||||
a,
|
||||
&c );
|
||||
}
|
||||
else
|
||||
*/
|
||||
{
|
||||
// If no cast is needed, then aliasing object c to the original
|
||||
// matrix serves as a minor optimization. This causes the unpackm
|
||||
// implementation to unpack directly into matrix a.
|
||||
bli_obj_alias_to( *a, c );
|
||||
}
|
||||
|
||||
// Now we are ready to proceed with the unpacking.
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = bli_cntl_var_num( cntl );
|
||||
i = bli_cntl_impl_type( cntl );
|
||||
|
||||
// Index into the variant array to extract the correct function pointer.
|
||||
f = vars[n][i];
|
||||
// Extract the function pointer from the current control tree node.
|
||||
f = bli_cntl_unpackm_params_var_func( cntl );
|
||||
|
||||
// Invoke the variant.
|
||||
if( bli_thread_am_ochief( thread ) ) {
|
||||
f( p,
|
||||
&c,
|
||||
cntx,
|
||||
cntl );
|
||||
}
|
||||
bli_thread_obarrier( thread );
|
||||
|
||||
// Now, if necessary, we cast the contents of c to matrix a. If casting
|
||||
// was not necessary, then we are done because the call to the unpackm
|
||||
// implementation would have unpacked directly to matrix a.
|
||||
/*
|
||||
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
|
||||
if ( bli_thread_am_ochief( thread ) )
|
||||
{
|
||||
// Copy/typecast matrix c to matrix a.
|
||||
// NOTE: Here, we use copynzm instead of copym because, in the cases
|
||||
// where we are unpacking/typecasting a real matrix c to a complex
|
||||
// matrix a, we want to touch only the real components of a, rather
|
||||
// than also set the imaginary components to zero. This comes about
|
||||
// because of the fact that, if we are unpacking real-to-complex,
|
||||
// then it is because all of the computation occurred in the real
|
||||
// domain, and so we would want to leave whatever imaginary values
|
||||
// there are in matrix a untouched. Notice that for unpackings that
|
||||
// entail complex-to-complex data movements, the copynzm operation
|
||||
// behaves exactly as copym, so no use cases are lost (at least none
|
||||
// that I can think of).
|
||||
bli_copynzm( &c,
|
||||
a );
|
||||
f
|
||||
(
|
||||
p,
|
||||
a,
|
||||
cntx,
|
||||
cntl,
|
||||
thread
|
||||
);
|
||||
}
|
||||
|
||||
// NOTE: The above code/comment is outdated. What should happen is
|
||||
// as follows:
|
||||
// - If dt(a) is complex and dt(p) is real, then create an alias of
|
||||
// a and then tweak it so that it looks like a real domain object.
|
||||
// This will involve:
|
||||
// - projecting the datatype to real domain
|
||||
// - scaling both the row and column strides by 2
|
||||
// ALL OF THIS should be done in the front-end, NOT here, as
|
||||
// unpackm() won't even be needed in that case.
|
||||
}
|
||||
*/
|
||||
// Barrier so that unpacking is done before computation.
|
||||
bli_thread_obarrier( thread );
|
||||
}
|
||||
|
||||
/*
|
||||
void bli_unpackm_init_cast( obj_t* p,
|
||||
obj_t* a,
|
||||
obj_t* c )
|
||||
{
|
||||
// The idea here is that we want to create an object c that is identical
|
||||
// to object a, except that:
|
||||
// (1) the storage datatype of c is equal to the target datatype of a,
|
||||
// with the element size of c adjusted accordingly,
|
||||
// (2) the view offset of c is reset to (0,0),
|
||||
// (3) object c's main buffer is set to a new memory region acquired
|
||||
// from the memory manager, or extracted from p if a mem entry is
|
||||
// already available, (After acquring a mem entry from the memory
|
||||
// manager, it is cached within p for quick access later on.)
|
||||
// (4) object c is marked as being stored in a standard, contiguous
|
||||
// format (ie: column-major order).
|
||||
// Any transposition encoded within object a will also be encoded in
|
||||
// object c. That way, unpackm handles any needed transposition during
|
||||
// the unpacking, and the only thing the cast stage needs to do is cast.
|
||||
|
||||
num_t dt_targ_a = bli_obj_target_datatype( *a );
|
||||
dim_t m_a = bli_obj_length( *a );
|
||||
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
|
||||
|
||||
inc_t rs_c, cs_c;
|
||||
|
||||
// We begin by copying the basic fields of a.
|
||||
bli_obj_alias_to( *a, *c );
|
||||
|
||||
// Update datatype and element size fields.
|
||||
bli_obj_set_datatype( dt_targ_a, *c );
|
||||
bli_obj_set_elem_size( elem_size_c, *c );
|
||||
|
||||
// Reset the view offsets to (0,0).
|
||||
bli_obj_set_offs( 0, 0, *c );
|
||||
|
||||
// Check the mem_t entry of p associated with the cast buffer. If it is
|
||||
// NULL, then acquire memory sufficient to hold the object data and cache
|
||||
// it to p. (Otherwise, if it is non-NULL, then memory has already been
|
||||
// acquired from the memory manager and cached.) We then set the main
|
||||
// buffer of c to the cached address of the cast memory.
|
||||
bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
|
||||
|
||||
// Update the strides. We set the increments to reflect column-major order
|
||||
// storage. We start the leading dimension out as m(a) and increment it if
|
||||
// necessary so that the beginning of each column is aligned.
|
||||
cs_c = bli_align_dim_to_size( m_a, elem_size_c,
|
||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
||||
rs_c = 1;
|
||||
bli_obj_set_strides( rs_c, cs_c, *c );
|
||||
}
|
||||
*/
|
||||
|
||||
@@ -32,14 +32,12 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_unpackm_int( obj_t* p,
|
||||
obj_t* a,
|
||||
cntx_t* cntx,
|
||||
unpackm_t* cntl,
|
||||
thrinfo_t* thread );
|
||||
void bli_unpackm_int
|
||||
(
|
||||
obj_t* p,
|
||||
obj_t* a,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
/*
|
||||
void bli_unpackm_init_cast( obj_t* p,
|
||||
obj_t* a,
|
||||
obj_t* c );
|
||||
*/
|
||||
|
||||
@@ -50,10 +50,14 @@ typedef void (*FUNCPTR_T)(
|
||||
static FUNCPTR_T GENARRAY(ftypes,unpackm_unb_var1);
|
||||
|
||||
|
||||
void bli_unpackm_unb_var1( obj_t* p,
|
||||
obj_t* c,
|
||||
cntx_t* cntx,
|
||||
unpackm_t* cntl )
|
||||
void bli_unpackm_unb_var1
|
||||
(
|
||||
obj_t* p,
|
||||
obj_t* c,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
num_t dt_pc = bli_obj_datatype( *p );
|
||||
|
||||
|
||||
@@ -32,10 +32,14 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_unpackm_unb_var1( obj_t* p,
|
||||
obj_t* c,
|
||||
cntx_t* cntx,
|
||||
unpackm_t* cntl );
|
||||
void bli_unpackm_unb_var1
|
||||
(
|
||||
obj_t* p,
|
||||
obj_t* c,
|
||||
cntx_t* cntx,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
|
||||
@@ -32,9 +32,10 @@
|
||||
|
||||
*/
|
||||
|
||||
#include "bli_gemv_cntl.h"
|
||||
#include "bli_gemv_front.h"
|
||||
#include "bli_gemv_int.h"
|
||||
// NOTE: level-2 control tree code is temporarily disabled.
|
||||
//#include "bli_gemv_cntl.h"
|
||||
//#include "bli_gemv_front.h"
|
||||
//#include "bli_gemv_int.h"
|
||||
|
||||
#include "bli_gemv_var.h"
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ void PASTEMAC0(opname) \
|
||||
obj_t* beta, \
|
||||
obj_t* y, \
|
||||
cntx_t* cntx, \
|
||||
gemv_t* cntl \
|
||||
cntl_t* cntl \
|
||||
);
|
||||
|
||||
GENPROT( gemv_blk_var1 )
|
||||
|
||||
@@ -45,7 +45,7 @@ void PASTEMAC0(opname) \
|
||||
obj_t* beta, \
|
||||
obj_t* y, \
|
||||
cntx_t* cntx, \
|
||||
gemv_t* cntl \
|
||||
cntl_t* cntl \
|
||||
) \
|
||||
{ \
|
||||
num_t dt = bli_obj_datatype( *a ); \
|
||||
|
||||
@@ -34,43 +34,64 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_trsm_cntx_init( cntx_t* cntx )
|
||||
{
|
||||
// Perform basic setup on the context.
|
||||
bli_cntx_obj_create( cntx );
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( ftname, opname ) \
|
||||
\
|
||||
/*static gemv_vft GENARRAY(ftypes,gemv_unb_var1);*/ \
|
||||
static GENARRAY_VFP(ftname,opname); \
|
||||
\
|
||||
void PASTEMAC0(opname) \
|
||||
( \
|
||||
obj_t* alpha, \
|
||||
obj_t* a, \
|
||||
obj_t* x, \
|
||||
obj_t* beta, \
|
||||
obj_t* y, \
|
||||
cntx_t* cntx, \
|
||||
gemv_t* cntl \
|
||||
) \
|
||||
{ \
|
||||
num_t dt = bli_obj_datatype( *a ); \
|
||||
\
|
||||
trans_t transa = bli_obj_conjtrans_status( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
dim_t n = bli_obj_width( *a ); \
|
||||
\
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
\
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
\
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \
|
||||
\
|
||||
PASTECH(ftname,_vft) f = PASTECH(opname,_vfp)[dt]; \
|
||||
\
|
||||
/* Invoke the void pointer-based function for the given datatype. */ \
|
||||
f( \
|
||||
transa, \
|
||||
conjx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_alpha, \
|
||||
buf_a, rs_a, cs_a, \
|
||||
buf_x, incx, \
|
||||
buf_beta, \
|
||||
buf_y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
|
||||
// Initialize the context with the current architecture's native
|
||||
// level-3 gemm micro-kernel, and its output preferences.
|
||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMM_UKR, cntx );
|
||||
bli_gks_cntx_set_l3_nat_ukr_prefs( BLIS_GEMM_UKR, cntx );
|
||||
GENFRONT( gemv, gemv_unb_var1 )
|
||||
GENFRONT( gemv, gemv_unb_var2 )
|
||||
|
||||
// Initialize the context with the current architecture's native
|
||||
// level-3 trsm micro-kernels.
|
||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_L_UKR, cntx );
|
||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_U_UKR, cntx );
|
||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_TRSM_L_UKR, cntx );
|
||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_TRSM_U_UKR, cntx );
|
||||
|
||||
// Initialize the context with the current architecture's register
|
||||
// and cache blocksizes (and multiples), given the execution method.
|
||||
bli_gks_cntx_set_blkszs( BLIS_NAT, 6,
|
||||
BLIS_NC, BLIS_NR,
|
||||
BLIS_KC, BLIS_KR,
|
||||
BLIS_MC, BLIS_MR,
|
||||
BLIS_NR, BLIS_NR,
|
||||
BLIS_MR, BLIS_MR,
|
||||
BLIS_KR, BLIS_KR,
|
||||
cntx );
|
||||
|
||||
// Set the pack_t schemas for native execution.
|
||||
bli_cntx_set_pack_schema_ab( BLIS_PACKED_ROW_PANELS,
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
cntx );
|
||||
}
|
||||
|
||||
void bli_trsm_cntx_finalize( cntx_t* cntx )
|
||||
{
|
||||
// Free the context and all memory allocated to it.
|
||||
bli_cntx_obj_free( cntx );
|
||||
}
|
||||
GENFRONT( gemv, gemv_unf_var1 )
|
||||
GENFRONT( gemv, gemv_unf_var2 )
|
||||
|
||||
@@ -32,8 +32,9 @@
|
||||
|
||||
*/
|
||||
|
||||
#include "bli_ger_cntl.h"
|
||||
#include "bli_ger_front.h"
|
||||
#include "bli_ger_int.h"
|
||||
// NOTE: level-2 control tree code is temporarily disabled.
|
||||
//#include "bli_ger_cntl.h"
|
||||
//#include "bli_ger_front.h"
|
||||
//#include "bli_ger_int.h"
|
||||
|
||||
#include "bli_ger_var.h"
|
||||
|
||||
@@ -47,7 +47,7 @@ void PASTEMAC0(opname) \
|
||||
obj_t* y, \
|
||||
obj_t* a, \
|
||||
cntx_t* cntx, \
|
||||
ger_t* cntl \
|
||||
cntl_t* cntl \
|
||||
);
|
||||
|
||||
GENPROT( ger_blk_var1 )
|
||||
|
||||
@@ -44,7 +44,7 @@ void PASTEMAC0(opname) \
|
||||
obj_t* y, \
|
||||
obj_t* a, \
|
||||
cntx_t* cntx, \
|
||||
ger_t* cntl \
|
||||
cntl_t* cntl \
|
||||
) \
|
||||
{ \
|
||||
num_t dt = bli_obj_datatype( *a ); \
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user