Files
blis/test/test_blis2.c
2013-02-11 14:37:47 -06:00

2478 lines
63 KiB
C

/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2013, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis2.h"
double FLA_Clock( void );
extern gemm_t* gemm_cntl;
// trans m n alpha a lda x incx beta y incy
void dgemv_( char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* );
void dger_( int*, int*, double*, double*, int*, double*, int*, double*, int* );
void dsymv_( char*, int*, double*, double*, int*, double*, int*, double*, double*, int* );
void dsyr_( char*, int*, double*, double*, int*, double*, int* );
void dsyr2_( char*, int*, double*, double*, int*, double*, int*, double*, int* );
void dtrmv_( char*, char*, char*, int*, double*, int*, double*, int* );
void dtrsv_( char*, char*, char*, int*, double*, int*, double*, int* );
// trans trans m n k alpha a lda b ldb beta c ldc
void dgemm_( char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* );
int main( int argc, char** argv )
{
#if 1
{
obj_t a, b, c, w;
//obj_t a1, b1, c1;
//obj_t a11, b11, c11;
obj_t a_pack, b_pack, c_pack;
obj_t c_save;
obj_t alpha, beta;
dim_t m, n, k;
//dim_t b_part;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input, k_input;
num_t dt_a, dt_b, dt_c;
num_t dt_alpha, dt_beta;
int r, n_repeats;
blksz_t* mr;
blksz_t* nr;
blksz_t* kr;
blksz_t* mc;
blksz_t* nc;
blksz_t* kc;
blksz_t* ni;
scalm_t* scalm_cntl;
packm_t* packm_cntl_a;
packm_t* packm_cntl_b;
packm_t* trmm_packm_cntl_a;
packm_t* trmm_packm_cntl_b;
packm_t* trsm_packm_cntl_a;
packm_t* trsm_packm_cntl_b;
unpackm_t* trsm_unpackm_cntl_b;
gemm_t* gemm_cntl_bp_ke;
gemm_t* gemm_cntl_op_bp;
gemm_t* gemm_cntl_mm_op;
gemm_t* gemm_cntl_vl_mm;
herk_t* herk_cntl_bp_ke;
herk_t* herk_cntl_op_bp;
herk_t* herk_cntl_mm_op;
herk_t* herk_cntl_vl_mm;
her2k_t* her2k_cntl_bp_ke;
her2k_t* her2k_cntl_op_bp;
her2k_t* her2k_cntl_mm_op;
her2k_t* her2k_cntl_vl_mm;
trmm_t* trmm_cntl_bp_ke;
trmm_t* trmm_cntl_op_bp;
trmm_t* trmm_cntl_mm_op;
trmm_t* trmm_cntl_vl_mm;
trmm_t* trmm3_cntl_mm_op;
trsm_t* trsm_cntl_bp_ke;
trsm_t* trsm_cntl_op_bp;
trsm_t* trsm_cntl_mm_op;
trsm_t* trsm_cntl_vl_mm;
double dtime;
double dtime_save;
double gflops;
bl2_init();
n_repeats = 3;
#if 1
p_begin = 32;
p_end = 1600;
p_inc = 32;
#else
p_begin = 768;
p_end = 768;
p_inc = 1;
#endif
#if 1
m_input = -2;
//m_input = 128;
//m_input = 256;
n_input = -1;
//n_input = 1024;
//k_input = -1;
//k_input = 256;
k_input = -1;
#else
p_begin = 16;
p_end = 16;
p_inc = 1;
m_input = 15;
k_input = 13;
n_input = 15;
#endif
dt_a = BLIS_DOUBLE;
//dt_a = BLIS_DCOMPLEX;
dt_b = BLIS_DOUBLE;
dt_c = BLIS_DOUBLE;
//dt_c = BLIS_DCOMPLEX;
dt_alpha = BLIS_DOUBLE;
dt_beta = BLIS_DOUBLE;
//p = p_begin;
for ( p = p_begin; p <= p_end; p += p_inc )
{
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
else k = ( dim_t ) k_input;
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_obj_create( dt_beta, 1, 1, 0, 0, &beta );
#if 1
bl2_obj_create( dt_a, m, k, 0, 0, &a );
bl2_obj_create( dt_b, k, n, 0, 0, &b );
bl2_obj_create( dt_c, m, n, 0, 0, &c );
bl2_obj_create( dt_c, m, n, 0, 0, &w );
bl2_obj_create( dt_c, m, n, 0, 0, &c_save );
bl2_randm( &a );
bl2_randm( &b );
bl2_randm( &c );
bl2_randm( &w );
#elif 0
bl2_obj_create( dt_a, m, k, 0, 0, &a );
bl2_obj_create( dt_b, m, k, 0, 0, &b );
bl2_obj_create( dt_c, m, m, 0, 0, &c );
bl2_obj_create( dt_c, m, m, 0, 0, &c_save );
bl2_randm( &a );
bl2_randm( &b );
bl2_randm( &c );
bl2_setm( &BLIS_ZERO, &c );
#else
bl2_obj_create( dt_a, m, m, 0, 0, &a );
bl2_obj_create( dt_b, m, n, 0, 0, &b );
bl2_obj_create( dt_c, m, n, 0, 0, &c );
bl2_obj_create( dt_c, m, n, 0, 0, &c_save );
bl2_obj_set_struc( BLIS_TRIANGULAR, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
bl2_obj_set_uplo( BLIS_LOWER, a );
//bl2_obj_set_diag_offset( 4, a );
bl2_randm( &a );
bl2_randm( &c );
bl2_randm( &b );
#endif
bl2_obj_init_pack( &a_pack );
bl2_obj_init_pack( &b_pack );
bl2_obj_init_pack( &c_pack );
#if 0
bl2_obj_set_struc( BLIS_TRIANGULAR, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_uplo( BLIS_UPPER, a );
bl2_obj_set_diag( BLIS_UNIT_DIAG, a );
bl2_setm( &BLIS_ZERO, &a );
bl2_obj_set_struc( BLIS_GENERAL, a );
bl2_obj_set_uplo( BLIS_DENSE, a );
bl2_obj_set_diag( BLIS_NONUNIT_DIAG, a );
#endif
bl2_setsc( (2.0/1.0), 0.0, &alpha );
bl2_setsc( -(1.0/1.0), 0.0, &beta );
mr = bl2_blksz_obj_create( 2, 4, 2, 2 );
kr = bl2_blksz_obj_create( 1, 1, 1, 1 );
nr = bl2_blksz_obj_create( 1, 4, 1, 1 );
mc = bl2_blksz_obj_create( 128, 384, 128, 128 );
kc = bl2_blksz_obj_create( 256, 384, 256, 256 );
nc = bl2_blksz_obj_create( 512, 512, 512, 512 );
ni = bl2_blksz_obj_create( 16, 32, 16, 16 );
scalm_cntl =
bl2_scalm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1 );
packm_cntl_a =
bl2_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
mr,
kr,
TRUE, // scale?
TRUE, // densify?
FALSE, // invert diagonal?
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS );
packm_cntl_b =
bl2_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
kr,
nr,
FALSE, // scale?
FALSE, // densify?
FALSE, // invert diagonal?
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS );
gemm_cntl_bp_ke =
bl2_gemm_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL );
gemm_cntl_op_bp =
bl2_gemm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT4,
//BLIS_VARIANT1,
mc,
ni,
NULL,
packm_cntl_a,
packm_cntl_b,
NULL,
gemm_cntl_bp_ke,
NULL );
gemm_cntl_mm_op =
bl2_gemm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
kc,
NULL,
scalm_cntl,
NULL,
NULL,
NULL,
gemm_cntl_op_bp,
NULL );
gemm_cntl_vl_mm =
bl2_gemm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
nc,
NULL,
NULL,
NULL,
NULL,
NULL,
gemm_cntl_mm_op,
NULL );
herk_cntl_bp_ke =
bl2_herk_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL );
herk_cntl_op_bp =
bl2_herk_cntl_obj_create( BLIS_BLOCKED,
//BLIS_VARIANT4,
BLIS_VARIANT1,
mc,
ni,
NULL,
packm_cntl_a,
packm_cntl_b,
NULL,
herk_cntl_bp_ke,
NULL );
herk_cntl_mm_op =
bl2_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
kc,
NULL,
scalm_cntl,
NULL,
NULL,
NULL,
herk_cntl_op_bp,
NULL );
herk_cntl_vl_mm =
bl2_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
nc,
NULL,
NULL,
NULL,
NULL,
NULL,
herk_cntl_mm_op,
NULL );
her2k_cntl_bp_ke =
bl2_her2k_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL );
her2k_cntl_op_bp =
bl2_her2k_cntl_obj_create( BLIS_BLOCKED,
//BLIS_VARIANT4,
BLIS_VARIANT1,
mc,
ni,
NULL,
packm_cntl_a,
packm_cntl_b,
NULL,
her2k_cntl_bp_ke,
herk_cntl_bp_ke,
NULL );
her2k_cntl_mm_op =
bl2_her2k_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
kc,
NULL,
scalm_cntl,
NULL,
NULL,
NULL,
her2k_cntl_op_bp,
NULL,
NULL );
her2k_cntl_vl_mm =
bl2_her2k_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
nc,
NULL,
NULL,
NULL,
NULL,
NULL,
her2k_cntl_mm_op,
NULL,
NULL );
trmm_packm_cntl_a =
bl2_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
mr,
kr,
TRUE, // scale?
TRUE, // densify?
FALSE, // invert diagonal?
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS );
trmm_packm_cntl_b =
bl2_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
kr,
nr,
FALSE, // scale?
FALSE, // densify?
FALSE, // invert diagonal?
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS );
trmm_cntl_bp_ke =
bl2_trmm_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL );
trmm_cntl_op_bp =
bl2_trmm_cntl_obj_create( BLIS_BLOCKED,
//BLIS_VARIANT4,
BLIS_VARIANT1,
mc,
ni,
NULL,
trmm_packm_cntl_a,
trmm_packm_cntl_b,
NULL,
trmm_cntl_bp_ke,
gemm_cntl_bp_ke,
NULL );
trmm_cntl_mm_op =
bl2_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
kc,
NULL,
NULL, //scalm_cntl,
NULL,
NULL,
NULL,
trmm_cntl_op_bp,
NULL,
NULL );
trmm_cntl_vl_mm =
bl2_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
nc,
NULL,
NULL,
NULL,
NULL,
NULL,
trmm_cntl_mm_op,
NULL,
NULL );
trmm3_cntl_mm_op =
bl2_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
kc,
NULL,
scalm_cntl,
NULL,
NULL,
NULL,
trmm_cntl_op_bp,
NULL,
NULL );
trsm_packm_cntl_a =
bl2_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
mr, // IMPORTANT: n dim multiple must be mr to
mr, // support right and bottom-right edge cases
FALSE, // scale?
TRUE, // densify?
TRUE, // invert diagonal?
TRUE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS );
trsm_packm_cntl_b =
bl2_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
mr, // IMPORTANT: m dim multiple must be mr since
nr, // B_pack is updated (ie: serves as C) in trsm
TRUE, // scale?
FALSE, // densify?
FALSE, // invert diagonal?
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS );
trsm_unpackm_cntl_b =
bl2_unpackm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
NULL );
trsm_cntl_bp_ke =
bl2_trsm_cntl_obj_create( BLIS_UNB_OPT,
//BLIS_VARIANT2,
BLIS_VARIANT3,
NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL );
trsm_cntl_op_bp =
bl2_trsm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT4,
//BLIS_VARIANT1,
mc,
ni,
NULL,
trsm_packm_cntl_a,
trsm_packm_cntl_b,
NULL,
trsm_cntl_bp_ke,
gemm_cntl_bp_ke,
NULL );
trsm_cntl_mm_op =
bl2_trsm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
kc,
NULL,
NULL, //scalm_cntl,
NULL,
NULL,
NULL,
trsm_cntl_op_bp,
NULL,
NULL );
trsm_cntl_vl_mm =
bl2_trsm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
nc,
NULL,
NULL,
NULL,
NULL,
NULL,
trsm_cntl_mm_op,
NULL,
NULL );
//bl2_printm( "a", &a, "%8.1e", "" );
//bl2_printm( "b", &b, "%8.1e", "" );
//bl2_printm( "c", &c, "%8.1e", "" );
//bl2_printm( "alpha", &alpha, "%8.1e", "" );
//bl2_printm( "beta", &beta, "%8.1e", "" );
bl2_copym( &c, &c_save );
dtime_save = 1.0e9;
for ( r = 0; r < n_repeats; ++r )
{
bl2_copym( &c_save, &c );
dtime = bl2_clock();
#if 0
//bl2_mm_clear_smem();
bl2_packm_init( &a, &a_pack, packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a, &a_pack, packm_cntl_a );
bl2_packm_init( &b, &b_pack, packm_cntl_b );
bl2_packm_int( &BLIS_ONE, &b, &b_pack, packm_cntl_b );
bl2_gemm_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ONE,
&c,
NULL );
#endif
#if 0
bl2_packm_init( &b, &b_pack, packm_cntl_b );
{
dim_t bn_alg = 32;
dim_t bn_use, j;
dim_t n_trans = bl2_obj_width_after_trans( b );
obj_t c1_fuse, b1_fuse;
bl2_packm_init( &a, &a_pack, packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a, &a_pack, packm_cntl_a );
for ( j = 0; j < n_trans; j += bn_alg )
{
bn_use = bl2_min( bn_alg, n_trans - j );
bl2_acquire_mpart_l2r( BLIS_SUBPART1, j, bn_use, &c, &c1_fuse );
bl2_acquire_mpart_l2r( BLIS_SUBPART1, j, bn_use, &b_pack, &b1_fuse );
bl2_packm_int( &BLIS_ONE, &b, &b1_fuse, packm_cntl_b );
bl2_gemm_ker_var2( &BLIS_ONE,
&a_pack,
&b1_fuse,
&BLIS_ONE,
&c1_fuse,
NULL );
}
}
#endif
#if 1
//bl2_obj_set_struc( BLIS_HERMITIAN, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_printm( "a", &a, "%4.1f", "" );
//bl2_printm( "b", &b, "%4.1f", "" );
//bl2_printm( "c", &c, "%4.1f", "" );
bl2_gemm_int( &BLIS_ONE,
//bl2_gemm_int( &alpha,
&a,
&b,
&BLIS_ONE,
&c,
//gemm_cntl_op_bp );
gemm_cntl_mm_op );
//bl2_copym( &c, &w );
//bl2_printm( "c after", &c, "%4.1f", "" );
//exit(1);
#endif
#if 0
bl2_obj_set_struc( BLIS_HERMITIAN, a );
bl2_obj_set_uplo( BLIS_LOWER, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
bl2_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bl2_gemm_int( &BLIS_ONE,
&a,
&b,
&BLIS_ONE,
&c,
//gemm_cntl_op_bp );
gemm_cntl_mm_op );
#endif
#if 0
bl2_obj_set_struc( BLIS_HERMITIAN, c );
bl2_obj_set_uplo( BLIS_UPPER, c );
obj_t a1, c1, ar = a;
bl2_obj_toggle_conj( ar );
bl2_obj_toggle_trans( ar );
dim_t bm = bl2_min( bl2_obj_length( a ), 128 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, bm, &a, &a1 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, bm, &c, &c1 );
dtime = bl2_clock();
bl2_packm_init( &a1, &a_pack, packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a1, &a_pack, packm_cntl_a );
bl2_packm_init( &ar, &b_pack, packm_cntl_b );
bl2_packm_int( &BLIS_ONE, &ar, &b_pack, packm_cntl_b );
//bl2_printm( "a", &a, "%4.1f", "" );
//bl2_printm( "c", &c, "%4.1f", "" );
bl2_herk_u_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ONE,
&c1,
NULL );
//bl2_printm( "c after", &c, "%4.1f", "" );
//exit(1);
#endif
#if 0
bl2_obj_set_struc( BLIS_HERMITIAN, c );
//bl2_obj_set_uplo( BLIS_LOWER, c );
bl2_obj_set_uplo( BLIS_UPPER, c );
bl2_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
obj_t ah;
bl2_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, a, ah );
//bl2_printm( "a", &a, "%4.1f", "" );
//bl2_printm( "c", &c, "%4.1f", "" );
bl2_herk_int( &BLIS_ONE,
&a,
&ah,
&BLIS_ONE,
&c,
herk_cntl_op_bp );
//herk_cntl_mm_op );
//herk_cntl_vl_mm );
//bl2_printm( "c after", &c, "%4.1f", "" );
//exit(1);
#endif
#if 0
bl2_obj_set_struc( BLIS_HERMITIAN, c );
//bl2_obj_set_uplo( BLIS_LOWER, c );
bl2_obj_set_uplo( BLIS_UPPER, c );
bl2_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
obj_t ah, bh;
bl2_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, a, ah );
bl2_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, b, bh );
//bl2_printm( "a", &a, "%4.1f", "" );
//bl2_printm( "b", &b, "%4.1f", "" );
//bl2_printm( "c", &c, "%4.1f", "" );
/*
bl2_her2k_int( &BLIS_ONE,
&a,
&bh,
&BLIS_ONE,
&b,
&ah,
&BLIS_ONE,
&c,
her2k_cntl_op_bp );
//her2k_cntl_mm_op );
//her2k_cntl_vl_mm );
*/
bl2_herk_int( &BLIS_ONE,
&a,
&bh,
&BLIS_ONE,
&c,
herk_cntl_op_bp );
bl2_herk_int( &BLIS_ONE,
&b,
&ah,
&BLIS_ONE,
&c,
herk_cntl_op_bp );
//bl2_printm( "c after", &c, "%4.1f", "" );
//exit(1);
#endif
#if 0
bl2_obj_set_struc( BLIS_TRIANGULAR, a );
bl2_obj_set_uplo( BLIS_LOWER, a );
//bl2_obj_set_diag( BLIS_UNIT_DIAG, a );
bl2_packm_init( &a, &a_pack, packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a, &a_pack, packm_cntl_a );
bl2_packm_init( &c, &b_pack, packm_cntl_b );
bl2_packm_int( &BLIS_ONE, &c, &b_pack, packm_cntl_b );
bl2_printm( "a", &a, "%4.1f", "" );
bl2_printm( "b", &c, "%4.1f", "" );
bl2_trmm_l_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c,
NULL );
bl2_printm( "b after", &c, "%4.1f", "" );
exit(1);
#endif
#if 0
bl2_printm( "b", &c, "%4.1f", "" );
bl2_printm( "a", &a, "%4.1f", "" );
obj_t a1, a2;
obj_t c1, c2;
dim_t off1 = 0;
dim_t off2 = 8;
dim_t m1 = off2;
dim_t m2 = m - m1;
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off1, m1, &a, &a1 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off2, m2, &a, &a2 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off1, m1, &c, &c1 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off2, m2, &c, &c2 );
bl2_packm_init( &c, &b_pack, trmm_packm_cntl_b );
bl2_packm_int( &BLIS_ONE, &c, &b_pack, trmm_packm_cntl_b );
bl2_packm_init( &a1, &a_pack, trmm_packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a1, &a_pack, trmm_packm_cntl_a );
bl2_trmm_u_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c1,
NULL );
bl2_packm_init( &a2, &a_pack, trmm_packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a2, &a_pack, trmm_packm_cntl_a );
bl2_trmm_u_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c2,
NULL );
bl2_printm( "b after", &c, "%4.1f", "" );
exit(1);
#endif
#if 0
bl2_obj_set_struc( BLIS_TRIANGULAR, a );
bl2_obj_set_uplo( BLIS_LOWER, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
bl2_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
//bl2_printm( "a", &a, "%4.1f", "" );
//bl2_printm( "b", &c, "%4.1f", "" );
bl2_trmm_int( BLIS_LEFT,
&BLIS_ONE,
&a,
&c,
&BLIS_ZERO,
&c,
trmm_cntl_mm_op );
//trmm_cntl_vl_mm );
//bl2_printm( "b after", &c, "%4.1f", "" );
//exit(1);
#endif
#if 0
bl2_obj_set_struc( BLIS_TRIANGULAR, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_uplo( BLIS_UPPER, a );
bl2_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bl2_printm( "a", &a, "%4.1f", "" );
bl2_printm( "b", &b, "%4.1f", "" );
bl2_printm( "c", &c, "%4.1f", "" );
bl2_trmm_int( BLIS_LEFT,
&BLIS_ONE,
&a,
&b,
&BLIS_ONE,
&c,
trmm_cntl_mm_op );
//trmm_cntl_vl_mm );
bl2_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
#if 0
bl2_printm( "b", &c, "%4.1f", "" );
bl2_printm( "a", &a, "%4.1f", "" );
obj_t a1, a2;
obj_t c1, c2;
dim_t off1 = 0;
dim_t off2 = 8;
dim_t m1 = off2;
dim_t m2 = m - m1;
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off1, m1, &a, &a1 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off2, m2, &a, &a2 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off1, m1, &c, &c1 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off2, m2, &c, &c2 );
bl2_packm_init( &c, &b_pack, trsm_packm_cntl_b );
bl2_packm_int( &BLIS_ONE, &c, &b_pack, trsm_packm_cntl_b );
bl2_packm_init( &a1, &a_pack, trsm_packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a1, &a_pack, trsm_packm_cntl_a );
bl2_trsm_l_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c1,
NULL );
bl2_packm_init( &a2, &a_pack, trsm_packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a2, &a_pack, trsm_packm_cntl_a );
bl2_trsm_l_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c2,
NULL );
bl2_printm( "b after", &c, "%4.1f", "" );
//bl2_unpackm_int( &b_pack, &d, trsm_unpackm_cntl_b );
//bl2_printm( "d (b after unpack2)", &d, "%4.1f", "" );
exit(1);
#endif
#if 0
bl2_printm( "b", &c, "%4.1f", "" );
bl2_printm( "a", &a, "%4.1f", "" );
obj_t a1, a2;
obj_t c1, c2;
dim_t off1 = 0;
dim_t off2 = 8;
dim_t m1 = off2;
dim_t m2 = m - m1;
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off1, m1, &a, &a1 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off2, m2, &a, &a2 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off1, m1, &c, &c1 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off2, m2, &c, &c2 );
bl2_packm_init( &c, &b_pack, trsm_packm_cntl_b );
bl2_packm_int( &BLIS_ONE, &c, &b_pack, trsm_packm_cntl_b );
bl2_packm_init( &a2, &a_pack, trsm_packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a2, &a_pack, trsm_packm_cntl_a );
bl2_trsm_u_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c2,
NULL );
bl2_packm_init( &a1, &a_pack, trsm_packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a1, &a_pack, trsm_packm_cntl_a );
bl2_trsm_u_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c1,
NULL );
bl2_printm( "b after", &c, "%4.1f", "" );
//bl2_printm( "b after", &c, "%7.4f", "" );
exit(1);
#endif
#if 0
bl2_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
/*
obj_t a1, a2;
obj_t c1, c2;
dim_t off1 = 0;
dim_t off2 = 128;
dim_t m1 = off2;
dim_t m2 = m - m1;
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off1, m1, &a, &a1 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off2, m2, &a, &a2 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off1, m1, &c, &c1 );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, off2, m2, &c, &c2 );
bl2_packm_init( &c, &b_pack, trsm_packm_cntl_b );
bl2_packm_int( &BLIS_ONE, &c, &b_pack, trsm_packm_cntl_b );
bl2_packm_init( &a1, &a_pack, trsm_packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a1, &a_pack, trsm_packm_cntl_a );
bl2_trsm_l_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c1,
NULL );
bl2_packm_init( &a2, &a_pack, trsm_packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a2, &a_pack, trsm_packm_cntl_a );
bl2_trsm_l_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c2,
NULL );
//bl2_unpackm_int( &b_pack, &d, trsm_unpackm_cntl_b );
*/
/*
bl2_packm_init( &c, &b_pack, trsm_packm_cntl_b );
bl2_packm_int( &BLIS_ONE, &c, &b_pack, trsm_packm_cntl_b );
bl2_packm_init( &a, &a_pack, trsm_packm_cntl_a );
bl2_packm_int( &BLIS_ONE, &a, &a_pack, trsm_packm_cntl_a );
bl2_trsm_l_ker_var2( &BLIS_ONE,
&a_pack,
&b_pack,
&BLIS_ZERO,
&c,
NULL );
bl2_unpackm_int( &b_pack, &c, trsm_unpackm_cntl_b );
*/
//bl2_printm( "d (b after unpack2)", &d, "%4.1f", "" );
//exit(1);
#endif
#if 0
bl2_obj_set_struc( BLIS_TRIANGULAR, a );
bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bl2_trsm_int( BLIS_LEFT,
&BLIS_ONE,
&a,
&c,
&BLIS_ZERO,
&c,
trsm_cntl_op_bp );
#endif
#if 0
//bl2_obj_set_struc( BLIS_TRIANGULAR, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_printm( "a", &a, "%4.1f", "" );
bl2_printm( "b", &c, "%4.1f", "" );
bl2_trsm_int( BLIS_LEFT,
&BLIS_ONE,
&a,
&c,
&BLIS_ZERO,
&c,
trsm_cntl_mm_op );
//trmm_cntl_vl_mm );
bl2_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
#if 0
//bl2_obj_set_struc( BLIS_TRIANGULAR, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bl2_trsm_int( BLIS_LEFT,
&BLIS_ONE,
&a,
&c,
&BLIS_ZERO,
&c,
//trsm_cntl_op_bp );
trsm_cntl_mm_op );
//trmm_cntl_vl_mm );
#endif
#if 0
char transa = 'N';
char transb = 'N';
int mm = bl2_obj_length( c );
int kk = bl2_obj_width_after_trans( a );
int nn = bl2_obj_width( c );
int lda = bl2_obj_col_stride( a );
int ldb = bl2_obj_col_stride( b );
int ldc = bl2_obj_col_stride( c );
dgemm_( &transa,
&transb,
&mm, &nn, &kk,
alpha.buffer,
a.buffer, &lda,
b.buffer, &ldb,
beta.buffer,
c.buffer, &ldc );
#endif
#if 0
bl2_gemv( &alpha, &a, &b1, &beta, &c1 );
//bl2_gemv_unf_var2( &alpha, &a, &b1, &beta, &c1, NULL );
//bl2_gemv( &alpha, &a11, &b1, &beta, &c11 );
//bl2_gemv_unf_var1( &alpha, &a, &b1, &beta, &c1, NULL );
//bl2_gemv_unb_var2( &alpha, &a11, &b1, &beta, &c1, NULL );
//bl2_gemv_unf_var2( &alpha, &a11, &b11, &beta, &c11, NULL );
//bl2_gemv( &alpha, &a, &b, &beta, &c );
//bl2_gemv_unb_var1( &alpha, &a, &b, &beta, &c, NULL );
//bl2_gemv_unf_var1( &alpha, &a, &b, &beta, &c, NULL );
//bl2_gemv_unb_var2( &alpha, &a, &b, &beta, &c, NULL );
//bl2_gemv_unf_var2( &alpha, &a, &b, &beta, &c, NULL );
//#else
char trans = 'N';
//char trans = 'T';
//int mm = bl2_obj_length( a11 );
//int kk = bl2_obj_width( a11 );
//int lda = bl2_obj_col_stride( a11 );
int mm = bl2_obj_length( a );
int kk = bl2_obj_width( a );
int lda = bl2_obj_col_stride( a );
//int one = 1;
int incb = bl2_obj_vector_inc( b1 );
int incc = bl2_obj_vector_inc( c1 );
dgemv_( &trans,
&mm, &kk,
alpha.buffer,
a.buffer, &lda,
b.buffer, &incb,
beta.buffer,
c.buffer, &incc );
#endif
#if 0
//bl2_ger_unb_var1( &alpha, &a, &b, &c, NULL );
//bl2_ger_unb_var2( &alpha, &a, &b, &c, NULL );
//#else
int one = 1; int mm = m; int nn = n;
dger_( &mm, &nn,
alpha.buffer,
a.buffer, &one,
b.buffer, &one,
c.buffer, &mm );
#endif
#if 0
bl2_obj_set_uplo( BLIS_LOWER, a );
//bl2_hemv_unb_var1( &alpha, &a, &b, &beta, &c, NULL );
//bl2_hemv_unb_var3( &alpha, &a, &b, &beta, &c, NULL );
//bl2_hemv_unf_var1a( &alpha, &a, &b, &beta, &c, NULL );
//bl2_hemv_unf_var3a( &alpha, &a, &b, &beta, &c, NULL );
bl2_hemv_unf_var1( &alpha, &a, &b, &beta, &c, NULL );
//bl2_hemv_unf_var3( &alpha, &a, &b, &beta, &c, NULL );
//#else
int one = 1; int mm = m; char uplo = 'L';
dsymv_( &uplo,
&mm,
alpha.buffer,
a.buffer, &mm,
b.buffer, &one,
beta.buffer,
c.buffer, &one );
#endif
#if 0
bl2_obj_set_uplo( BLIS_LOWER, c );
bl2_her_unb_var2( &alpha, &a, &c, NULL );
//#else
int one = 1; int mm = m; char uplo = 'L';
dsyr_( &uplo,
&mm,
alpha.buffer,
a.buffer, &one,
c.buffer, &mm );
#endif
#if 0
bl2_obj_set_uplo( BLIS_LOWER, c );
//bl2_her2( &alpha, &a, &b, &c );
//bl2_her2_unb_var1( &alpha, &a, &b, &c, NULL );
//bl2_her2_unb_var4( &alpha, &a, &b, &c, NULL );
//bl2_her2_unf_var1( &alpha, &a, &b, &c, NULL );
//bl2_her2_unf_var4( &alpha, &a, &b, &c, NULL );
//#else
int one = 1; int mm = bl2_obj_col_stride( c ); char uplo = 'L';
dsyr2_( &uplo,
&mm,
alpha.buffer,
a.buffer, &one,
b.buffer, &one,
c.buffer, &mm );
#endif
#if 0
bl2_setsc( (1.0/1.0), &alpha );
bl2_obj_set_uplo( BLIS_LOWER, c );
bl2_obj_set_conjtrans( BLIS_NO_TRANSPOSE, c );
//bl2_trmv( &alpha, &c, &a );
//bl2_trmv_unb_var1( &alpha, &c, &a, NULL );
//bl2_trmv_unb_var2( &alpha, &c, &a, NULL );
bl2_trmv_unf_var1( &alpha, &c, &a, NULL );
//bl2_trmv_unf_var2( &alpha, &c, &a, NULL );
//#else
int one = 1; int mm = m; char uplo = 'U'; char trans = 'T'; char diag = 'N';
dtrmv_( &uplo,
&trans,
&diag,
&mm,
c.buffer, &mm,
a.buffer, &one );
#endif
#if 0
bl2_setsc( (1.0/1.0), &alpha );
bl2_obj_set_uplo( BLIS_UPPER, c );
bl2_obj_set_conjtrans( BLIS_NO_TRANSPOSE, c );
//bl2_trsv( &alpha, &c, &a );
//bl2_trsv_unb_var1( &alpha, &c, &a, NULL );
//bl2_trsv_unb_var2( &alpha, &c, &a, NULL );
bl2_trsv_unf_var1( &alpha, &c, &a, NULL );
//bl2_trsv_unf_var2( &alpha, &c, &a, NULL );
//#else
int one = 1; int mm = m; char uplo = 'L'; char trans = 'T'; char diag = 'N';
dtrsv_( &uplo,
&trans,
&diag,
&mm,
c.buffer, &mm,
a.buffer, &one );
#endif
//obj_t c_part;
//dim_t ij = 3;
//dim_t bb = 2;
//bl2_acquire_mpart_tl2br( BLIS_SUBPART22,
// ij,
// bb,
// &c,
// &c_part );
//bl2_printm( "c_part", &c_part, "%8.1e", "" );
dtime = bl2_clock() - dtime;
dtime_save = bl2_min( dtime, dtime_save );
}
//bl2_printm( "c after", &c, "%8.1e", "" );
gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );
//gflops = ( 2.0 * a_pack.m * k * n - a_pack.m * a_pack.m * k ) / ( dtime_save * 1.0e9 );
if ( bl2_obj_is_hermitian( c ) || bl2_obj_is_triangular( a ) ) gflops /= 2.0;
//gflops /= 4.0;
//gflops = ( 2.0 * bl2_obj_length( a11 ) * bl2_obj_width( a11 ) ) / ( dtime_save * 1.0e9 );
//gflops = ( 2.0 * bl2_obj_length( a ) * bl2_obj_width( a ) ) / ( dtime_save * 1.0e9 );
printf( "data_blis( %2ld, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n",
(p - p_begin + 1)/p_inc + 1, m, k, n, dtime_save, gflops );
bl2_obj_release_pack( &a_pack );
bl2_obj_release_pack( &b_pack );
bl2_obj_release_pack( &c_pack );
bl2_blksz_obj_free( mr );
bl2_blksz_obj_free( nr );
bl2_blksz_obj_free( kr );
bl2_blksz_obj_free( mc );
bl2_blksz_obj_free( nc );
bl2_blksz_obj_free( kc );
bl2_blksz_obj_free( ni );
bl2_cntl_obj_free( scalm_cntl );
bl2_cntl_obj_free( packm_cntl_a );
bl2_cntl_obj_free( packm_cntl_b );
bl2_cntl_obj_free( trmm_packm_cntl_a );
bl2_cntl_obj_free( trmm_packm_cntl_b );
bl2_cntl_obj_free( trsm_packm_cntl_a );
bl2_cntl_obj_free( trsm_packm_cntl_b );
bl2_cntl_obj_free( trsm_unpackm_cntl_b );
bl2_cntl_obj_free( gemm_cntl_bp_ke );
bl2_cntl_obj_free( gemm_cntl_op_bp );
bl2_cntl_obj_free( gemm_cntl_mm_op );
bl2_cntl_obj_free( gemm_cntl_vl_mm );
bl2_cntl_obj_free( herk_cntl_bp_ke );
bl2_cntl_obj_free( herk_cntl_op_bp );
bl2_cntl_obj_free( herk_cntl_mm_op );
bl2_cntl_obj_free( herk_cntl_vl_mm );
bl2_cntl_obj_free( her2k_cntl_bp_ke );
bl2_cntl_obj_free( her2k_cntl_op_bp );
bl2_cntl_obj_free( her2k_cntl_mm_op );
bl2_cntl_obj_free( her2k_cntl_vl_mm );
bl2_cntl_obj_free( trmm_cntl_bp_ke );
bl2_cntl_obj_free( trmm_cntl_op_bp );
bl2_cntl_obj_free( trmm_cntl_mm_op );
bl2_cntl_obj_free( trmm_cntl_vl_mm );
bl2_cntl_obj_free( trmm3_cntl_mm_op );
bl2_cntl_obj_free( trsm_cntl_bp_ke );
bl2_cntl_obj_free( trsm_cntl_op_bp );
bl2_cntl_obj_free( trsm_cntl_mm_op );
bl2_cntl_obj_free( trsm_cntl_vl_mm );
bl2_obj_free( &alpha );
bl2_obj_free( &beta );
bl2_obj_free( &a );
bl2_obj_free( &b );
bl2_obj_free( &c );
bl2_obj_free( &w );
bl2_obj_free( &c_save );
}
bl2_finalize();
return 0;
}
#endif
// -- Level 1d -----------------------------------------------------------------
#if 0
{
num_t dt_alpha, dt_a, dt_b;
obj_t alpha, a, b;
dim_t m, n;
dt_alpha = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
m = 6;
n = 11;
bl2_init();
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_obj_create( dt_a, n, m, 0, 0, &a );
bl2_obj_create( dt_b, m, n, 0, 0, &b );
bl2_setsc( -(3.0/1.0), &alpha );
bl2_setm( &BLIS_TWO, &a );
bl2_setm( &BLIS_ONE, &b );
bl2_printm( "alpha", &alpha, "%4.1f", "" );
bl2_printm( "a before", &a, "%4.1f", "" );
bl2_printm( "b before", &b, "%4.1f", "" );
//bl2_obj_set_struc( BLIS_SYMMETRIC, a );
bl2_obj_set_struc( BLIS_GENERAL, a );
bl2_obj_set_uplo( BLIS_DENSE, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_diag_offset( -7, a );
bl2_obj_set_diag( BLIS_UNIT_DIAG, a );
bl2_obj_set_trans( BLIS_TRANSPOSE, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, a );
bl2_axpyd( &alpha, &a, &b );
bl2_printm( "b after", &b, "%4.1f", "" );
bl2_obj_free( &alpha );
bl2_obj_free( &a );
bl2_obj_free( &b );
bl2_finalize();
return 0;
}
#endif
#if 0
{
num_t dt_a, dt_b;
obj_t a, b;
dim_t m, n;
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
m = 6;
n = 11;
bl2_init();
bl2_obj_create( dt_a, n, m, 0, 0, &a );
bl2_obj_create( dt_b, m, n, 0, 0, &b );
bl2_setm( &BLIS_TWO, &a );
bl2_setm( &BLIS_ZERO, &b );
bl2_printm( "a before", &a, "%4.1f", "" );
bl2_printm( "b before", &b, "%4.1f", "" );
//bl2_obj_set_struc( BLIS_SYMMETRIC, a );
bl2_obj_set_struc( BLIS_GENERAL, a );
bl2_obj_set_uplo( BLIS_DENSE, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_diag_offset( -7, a );
bl2_obj_set_diag( BLIS_NONUNIT_DIAG, a );
bl2_obj_set_trans( BLIS_TRANSPOSE, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, a );
bl2_copyd( &a, &b );
bl2_printm( "b after", &b, "%4.1f", "" );
bl2_obj_free( &a );
bl2_obj_free( &b );
bl2_finalize();
return 0;
}
#endif
#if 0
{
num_t dt_beta, dt_a, dt_b;
obj_t beta, a, b;
dim_t m, n;
dt_beta = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
m = 6;
n = 11;
bl2_init();
bl2_obj_create( dt_beta, 1, 1, 0, 0, &beta );
bl2_obj_create( dt_a, n, m, 0, 0, &a );
bl2_obj_create( dt_b, m, n, 0, 0, &b );
bl2_setsc( -(3.0/1.0), &beta );
bl2_setm( &BLIS_MINUS_TWO, &a );
bl2_setm( &BLIS_ONE, &b );
bl2_printm( "beta", &beta, "%4.1f", "" );
bl2_printm( "a before", &a, "%4.1f", "" );
bl2_printm( "b before", &b, "%4.1f", "" );
//bl2_obj_set_struc( BLIS_SYMMETRIC, a );
bl2_obj_set_struc( BLIS_GENERAL, a );
bl2_obj_set_uplo( BLIS_DENSE, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_diag_offset( 2, a );
bl2_obj_set_diag( BLIS_NONUNIT_DIAG, a );
bl2_obj_set_trans( BLIS_TRANSPOSE, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, a );
bl2_scal2d( &beta, &a, &b );
bl2_printm( "b after", &b, "%4.1f", "" );
bl2_obj_free( &beta );
bl2_obj_free( &a );
bl2_obj_free( &b );
bl2_finalize();
return 0;
}
#endif
#if 0
{
num_t dt_a, dt_alpha;
obj_t alpha, a;
dim_t m, n;
dt_alpha = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
m = 6;
n = 11;
bl2_init();
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_obj_create( dt_a, m, n, 0, 0, &a );
bl2_setsc( -(4.0/1.0), &alpha );
bl2_setm( &BLIS_TWO, &a );
bl2_printm( "alpha", &alpha, "%4.1f", "" );
bl2_printm( "a before", &a, "%4.1f", "" );
//bl2_obj_set_struc( BLIS_GENERAL, a );
//bl2_obj_set_uplo( BLIS_DENSE, a );
//bl2_obj_set_struc( BLIS_SYMMETRIC, a );
bl2_obj_set_struc( BLIS_GENERAL, a );
bl2_obj_set_uplo( BLIS_DENSE, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_diag_offset( -2, a );
bl2_obj_set_diag( BLIS_UNIT_DIAG, a );
bl2_scald( &alpha, &a );
bl2_printm( "a after", &a, "%4.1f", "" );
bl2_obj_free( &alpha );
bl2_obj_free( &a );
bl2_finalize();
return 0;
}
#endif
#if 0
{
num_t dt_a, dt_alpha;
obj_t alpha, a;
dim_t m, n;
dt_alpha = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
m = 6;
n = 11;
bl2_init();
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_obj_create( dt_a, m, n, 0, 0, &a );
bl2_setsc( -(4.0/1.0), &alpha );
bl2_setm( &BLIS_TWO, &a );
bl2_printm( "alpha", &alpha, "%4.1f", "" );
bl2_printm( "a before", &a, "%4.1f", "" );
//bl2_obj_set_struc( BLIS_GENERAL, a );
//bl2_obj_set_uplo( BLIS_DENSE, a );
//bl2_obj_set_struc( BLIS_SYMMETRIC, a );
bl2_obj_set_struc( BLIS_GENERAL, a );
bl2_obj_set_uplo( BLIS_DENSE, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_diag_offset( 7, a );
bl2_obj_set_diag( BLIS_NONUNIT_DIAG, a );
bl2_setd( &alpha, &a );
bl2_printm( "a after", &a, "%4.1f", "" );
bl2_obj_free( &alpha );
bl2_obj_free( &a );
bl2_finalize();
return 0;
}
#endif
// -- Level 1m -----------------------------------------------------------------
#if 0
{
num_t dt_alpha, dt_a, dt_b;
obj_t alpha, a, b;
dim_t m, n;
dt_alpha = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
m = 6;
n = 11;
bl2_init();
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_obj_create( dt_a, n, m, 0, 0, &a );
bl2_obj_create( dt_b, m, n, 0, 0, &b );
bl2_setsc( -(3.0/1.0), &alpha );
bl2_setm( &BLIS_TWO, &a );
bl2_setm( &BLIS_ONE, &b );
bl2_printm( "alpha", &alpha, "%4.1f", "" );
bl2_printm( "a before", &a, "%4.1f", "" );
bl2_printm( "b before", &b, "%4.1f", "" );
bl2_obj_set_struc( BLIS_SYMMETRIC, a );
//bl2_obj_set_struc( BLIS_GENERAL, a );
//bl2_obj_set_uplo( BLIS_DENSE, a );
bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_diag_offset( 2, a );
bl2_obj_set_diag( BLIS_UNIT_DIAG, a );
bl2_obj_set_trans( BLIS_TRANSPOSE, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, a );
bl2_axpym( &alpha, &a, &b );
bl2_printm( "b after", &b, "%4.1f", "" );
bl2_obj_free( &alpha );
bl2_obj_free( &a );
bl2_obj_free( &b );
bl2_finalize();
return 0;
}
#endif
#if 0
{
num_t dt_a, dt_b;
obj_t a, b;
dim_t m, n;
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
m = 11;
n = 6;
bl2_init();
bl2_obj_create( dt_a, n, m, 0, 0, &a );
bl2_obj_create( dt_b, m, n, 0, 0, &b );
//bl2_setm( &BLIS_MINUS_ONE, &a );
bl2_randm( &a );
bl2_setm( &BLIS_ZERO, &b );
bl2_printm( "a before", &a, "%4.1f", "" );
bl2_printm( "b before", &b, "%4.1f", "" );
//bl2_obj_set_struc( BLIS_GENERAL, a );
//bl2_obj_set_uplo( BLIS_DENSE, a );
bl2_obj_set_diag_offset( 2, a );
bl2_obj_set_struc( BLIS_TRIANGULAR, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_trans( BLIS_TRANSPOSE, a );
bl2_copym( &a, &b );
bl2_printm( "b after", &b, "%4.1f", "" );
bl2_obj_free( &a );
bl2_obj_free( &b );
bl2_finalize();
return 0;
}
#endif
#if 0
{
num_t dt_a, dt_alpha;
obj_t alpha, a;
dim_t m, n;
dt_alpha = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
m = 6;
n = 1;
bl2_init();
bl2_obj_create( dt_a, m, n, 0, 0, &a );
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_setm( &BLIS_ONE, &a );
bl2_setsc( -(4.0/1.0), &alpha );
bl2_printm( "alpha", &alpha, "%4.1f", "" );
bl2_printm( "a before", &a, "%4.1f", "" );
//bl2_obj_set_struc( BLIS_GENERAL, a );
//bl2_obj_set_uplo( BLIS_DENSE, a );
bl2_obj_set_diag_offset( -7, a );
bl2_obj_set_struc( BLIS_TRIANGULAR, a );
bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_scalm( &alpha, &a );
bl2_printm( "a after", &a, "%4.1f", "" );
bl2_obj_free( &a );
bl2_obj_free( &alpha );
bl2_finalize();
return 0;
}
#endif
#if 0
{
num_t dt_beta, dt_a, dt_b;
obj_t beta, a, b;
dim_t m, n;
dt_beta = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
m = 6;
n = 11;
bl2_init();
bl2_obj_create( dt_beta, 1, 1, 0, 0, &beta );
bl2_obj_create( dt_a, n, m, 0, 0, &a );
bl2_obj_create( dt_b, m, n, 0, 0, &b );
bl2_setsc( -(3.0/1.0), &beta );
bl2_setm( &BLIS_TWO, &a );
bl2_setm( &BLIS_ONE, &b );
bl2_printm( "beta", &beta, "%4.1f", "" );
bl2_printm( "a before", &a, "%4.1f", "" );
bl2_printm( "b before", &b, "%4.1f", "" );
bl2_obj_set_struc( BLIS_SYMMETRIC, a );
//bl2_obj_set_struc( BLIS_GENERAL, a );
//bl2_obj_set_uplo( BLIS_DENSE, a );
bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_diag_offset( -2, a );
bl2_obj_set_diag( BLIS_UNIT_DIAG, a );
bl2_obj_set_trans( BLIS_TRANSPOSE, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, a );
bl2_scal2m( &beta, &a, &b );
bl2_printm( "b after", &b, "%4.1f", "" );
bl2_obj_free( &beta );
bl2_obj_free( &a );
bl2_obj_free( &b );
bl2_finalize();
return 0;
}
#endif
#if 0
{
num_t dt_beta, dt_a;
obj_t beta, a, p;
dim_t m, n;
blksz_t* mult_m;
blksz_t* mult_n;
packm_t* cntl;
dt_beta = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
m = 6;
n = 11;
bl2_init();
bl2_obj_create( dt_beta, 1, 1, 0, 0, &beta );
bl2_obj_create( dt_a, n, m, 0, 0, &a );
bl2_setsc( -(1.0/1.0), &beta );
//bl2_setm( &BLIS_TWO, &a );
bl2_randm( &a );
bl2_printm( "beta", &beta, "%4.1f", "" );
bl2_printm( "a before", &a, "%4.1f", "" );
//bl2_obj_set_struc( BLIS_SYMMETRIC, a );
bl2_obj_set_struc( BLIS_TRIANGULAR, a );
//bl2_obj_set_struc( BLIS_GENERAL, a );
//bl2_obj_set_uplo( BLIS_DENSE, a );
bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_diag_offset( -2, a );
bl2_obj_set_diag( BLIS_NONUNIT_DIAG, a );
bl2_obj_set_trans( BLIS_TRANSPOSE, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, a );
mult_m = bl2_blksz_obj_create( 2, 4, 2, 2 );
mult_n = bl2_blksz_obj_create( 1, 2, 1, 1 );
cntl = bl2_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
mult_m,
mult_n,
//TRUE, // scale?
FALSE, // scale?
//FALSE, // densify?
TRUE, // densify?
BLIS_PACKED_ROWS );
//BLIS_PACKED_COLUMNS );
bl2_obj_init_pack( &p );
bl2_packm_int( &beta, &a, &p, cntl );
bl2_printm( "p after", &p, "%4.1f", "" );
p.m = p.pack_mem->m;
p.n = p.pack_mem->n;
bl2_printm( "p mem", &p, "%4.1f", "" );
bl2_obj_release_pack( &p );
bl2_obj_free( &beta );
bl2_obj_free( &a );
bl2_blksz_obj_free( mult_m );
bl2_blksz_obj_free( mult_n );
bl2_cntl_obj_free( cntl );
bl2_finalize();
return 0;
}
#endif
#if 0
{
num_t dt_beta, dt_a;
obj_t beta, a, p;
dim_t m, n;
blksz_t* mult_m;
blksz_t* mult_n;
packm_t* cntl;
dt_beta = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
m = 7;
n = 11;
bl2_init();
bl2_obj_create( dt_beta, 1, 1, 0, 0, &beta );
bl2_obj_create( dt_a, m, n, 0, 0, &a );
bl2_setsc( -(1.0/1.0), &beta );
//bl2_setm( &BLIS_TWO, &a );
bl2_randm( &a );
bl2_printm( "beta", &beta, "%4.1f", "" );
bl2_printm( "a before", &a, "%4.1f", "" );
//bl2_obj_set_struc( BLIS_SYMMETRIC, a );
//bl2_obj_set_struc( BLIS_TRIANGULAR, a );
bl2_obj_set_struc( BLIS_GENERAL, a );
bl2_obj_set_uplo( BLIS_DENSE, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
//bl2_obj_set_diag_offset( -2, a );
bl2_obj_set_diag_offset( 0, a );
//bl2_obj_set_diag( BLIS_UNIT_DIAG, a );
//bl2_obj_set_trans( BLIS_TRANSPOSE, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, a );
mult_m = bl2_blksz_obj_create( 2, 4, 2, 2 );
mult_n = bl2_blksz_obj_create( 1, 2, 1, 1 );
cntl = bl2_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
mult_m,
mult_n,
//TRUE, // scale?
FALSE, // scale?
//FALSE, // densify?
TRUE, // densify?
BLIS_PACKED_COL_PANELS );
bl2_obj_init_pack( &p );
bl2_packm_int( &beta, &a, &p, cntl );
p.m = p.pack_mem->m;
p.n = p.pack_mem->n;
p.rs = 1;
p.cs = p.pack_mem->m;
bl2_printm( "p mem", &p, "%4.1f", "" );
bl2_obj_release_pack( &p );
bl2_obj_free( &beta );
bl2_obj_free( &a );
bl2_blksz_obj_free( mult_m );
bl2_blksz_obj_free( mult_n );
bl2_cntl_obj_free( cntl );
bl2_finalize();
return 0;
}
#endif
// -- Level 2 ------------------------------------------------------------------
#if 0
{
obj_t alpha, beta, a, x, y, xK, yK;
num_t dt_alpha, dt_beta, dt_a, dt_x, dt_y;
dim_t m, n, K;
//dt_alpha = BLIS_DCOMPLEX;
//dt_beta = BLIS_DCOMPLEX;
//dt_a = BLIS_DCOMPLEX;
//dt_x = BLIS_DCOMPLEX;
//dt_y = BLIS_DCOMPLEX;
dt_alpha = BLIS_DOUBLE;
dt_beta = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_x = BLIS_DOUBLE;
dt_y = BLIS_DOUBLE;
m = 11;
n = 13;
K = 4;
bl2_init();
//bl2_obj_create( dt_a, m, n, n, 1, &a );
//bl2_obj_create( dt_a, m, n, 0, 0, &a );
bl2_obj_create( dt_a, m, n, 0, 0, &a );
bl2_obj_create( dt_x, K, n, 0, 0, &xK );
bl2_obj_create( dt_y, K, m, 0, 0, &yK );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &xK, &x );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &yK, &y );
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_obj_create( dt_beta, 1, 1, 0, 0, &beta );
bl2_setm( &BLIS_ZERO, &a );
bl2_setm( &BLIS_ZERO, &x );
bl2_setm( &BLIS_ZERO, &y );
bl2_randm( &a );
bl2_randm( &x );
bl2_randm( &y );
bl2_setsc( (2.0/1.0), &alpha );
bl2_setsc( -(1.0/1.0), &beta );
//bl2_setsc( (1.0/1.0), &alpha );
//bl2_setsc( (0.0/1.0), &beta );
bl2_printm( "a", &a, "%4.1f", "" );
bl2_printm( "x", &x, "%4.1f", "" );
bl2_printm( "y", &y, "%4.1f", "" );
//bl2_obj_set_conjtrans( BLIS_CONJ_NO_TRANSPOSE, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, x );
bl2_gemv( &alpha, &a, &x, &beta, &y );
//bl2_gemv_unb_var1( &alpha, &a, &x, &beta, &y, NULL );
//bl2_gemv_unf_var1( &alpha, &a, &x, &beta, &y, NULL );
//bl2_gemv_unb_var2( &alpha, &a, &x, &beta, &y, NULL );
//bl2_gemv_unf_var2( &alpha, &a, &x, &beta, &y, NULL );
bl2_printm( "y after", &y, "%4.1f", "" );
bl2_obj_free( &a );
bl2_obj_free( &xK );
bl2_obj_free( &yK );
bl2_obj_free( &alpha );
bl2_obj_free( &beta );
bl2_finalize();
return 0;
}
#endif
#if 0
{
obj_t alpha, a, x, y, xK, yK;
num_t dt_alpha, dt_a, dt_x, dt_y;
dim_t m, n, K;
//dt_alpha = BLIS_DCOMPLEX;
//dt_a = BLIS_DCOMPLEX;
//dt_x = BLIS_DCOMPLEX;
//dt_y = BLIS_DCOMPLEX;
dt_alpha = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_x = BLIS_DOUBLE;
dt_y = BLIS_DOUBLE;
m = 11;
n = 13;
K = 4;
bl2_init();
bl2_obj_create( dt_a, m, n, 0, 0, &a );
//bl2_obj_create( dt_a, m, n, n, 1, &a );
bl2_obj_create( dt_x, K, m, 0, 0, &xK );
bl2_obj_create( dt_y, K, n, 0, 0, &yK );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &xK, &x );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &yK, &y );
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_setm( &BLIS_ZERO, &a );
bl2_setm( &BLIS_ZERO, &x );
bl2_setm( &BLIS_ZERO, &y );
bl2_randm( &a );
bl2_randm( &x );
bl2_randm( &y );
bl2_setsc( (2.0/1.0), &alpha );
bl2_printm( "a", &a, "%4.1f", "" );
bl2_printm( "x", &x, "%4.1f", "" );
bl2_printm( "y", &y, "%4.1f", "" );
bl2_obj_set_conj( BLIS_NO_CONJUGATE, x );
bl2_obj_set_conj( BLIS_NO_CONJUGATE, y );
bl2_ger( &alpha, &x, &y, &a );
//bl2_ger_unb_var1( &alpha, &x, &y, &a, NULL );
//bl2_ger_unb_var2( &alpha, &x, &y, &a, NULL );
bl2_printm( "a after", &a, "%4.1f", "" );
bl2_obj_free( &a );
bl2_obj_free( &xK );
bl2_obj_free( &yK );
bl2_obj_free( &alpha );
bl2_finalize();
return 0;
}
#endif
#if 0
{
obj_t alpha, beta, a, x, y, xK, yK;
num_t dt_alpha, dt_beta, dt_a, dt_x, dt_y;
dim_t m, K;
//dt_alpha = BLIS_DCOMPLEX;
//dt_beta = BLIS_DCOMPLEX;
//dt_a = BLIS_DCOMPLEX;
//dt_x = BLIS_DCOMPLEX;
//dt_y = BLIS_DCOMPLEX;
dt_alpha = BLIS_DOUBLE;
dt_beta = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_x = BLIS_DOUBLE;
dt_y = BLIS_DOUBLE;
m = 11;
K = 4;
bl2_init();
bl2_obj_create( dt_a, m, m, 0, 0, &a );
//bl2_obj_create( dt_a, m, m, m, 1, &a );
bl2_obj_create( dt_x, K, m, 0, 0, &xK );
bl2_obj_create( dt_y, K, m, 0, 0, &yK );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &xK, &x );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &yK, &y );
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_obj_create( dt_beta, 1, 1, 0, 0, &beta );
bl2_setm( &BLIS_ZERO, &a );
bl2_setm( &BLIS_ZERO, &x );
bl2_setm( &BLIS_ZERO, &y );
bl2_randm( &a );
bl2_randm( &x );
bl2_randm( &y );
bl2_setsc( (2.0/1.0), &alpha );
bl2_setsc( -(1.0/1.0), &beta );
//bl2_setsc( (1.0/1.0), &alpha );
//bl2_setsc( (1.0/1.0), &beta );
bl2_printm( "a", &a, "%4.1f", "" );
bl2_printm( "x", &x, "%4.1f", "" );
bl2_printm( "y", &y, "%4.1f", "" );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_uplo( BLIS_UPPER, a );
bl2_obj_set_conj( BLIS_NO_CONJUGATE, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, x );
bl2_hemv( &alpha, &a, &x, &beta, &y );
//bl2_hemv_unb_var1( &alpha, &a, &x, &beta, &y, NULL );
//bl2_hemv_unb_var2( &alpha, &a, &x, &beta, &y, NULL );
//bl2_hemv_unb_var3( &alpha, &a, &x, &beta, &y, NULL );
//bl2_hemv_unb_var4( &alpha, &a, &x, &beta, &y, NULL );
//bl2_hemv_unf_var1a( &alpha, &a, &x, &beta, &y, NULL );
//bl2_hemv_unf_var1( &alpha, &a, &x, &beta, &y, NULL );
//bl2_hemv_unf_var3a( &alpha, &a, &x, &beta, &y, NULL );
//bl2_hemv_unf_var3( &alpha, &a, &x, &beta, &y, NULL );
bl2_printm( "y after", &y, "%4.1f", "" );
bl2_obj_free( &a );
bl2_obj_free( &xK );
bl2_obj_free( &yK );
bl2_obj_free( &alpha );
bl2_obj_free( &beta );
bl2_finalize();
return 0;
}
#endif
#if 0
{
obj_t alpha, a, x, xK;
num_t dt_alpha, dt_a, dt_x;
dim_t m, K;
//dt_x = BLIS_DCOMPLEX;
//dt_a = BLIS_DCOMPLEX;
dt_x = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_alpha = bl2_datatype_proj_to_real( dt_x );
m = 13;
K = 4;
bl2_init();
bl2_obj_create( dt_x, K, m, 0, 0, &xK );
//bl2_obj_create( dt_a, m, m, 0, 0, &a );
bl2_obj_create( dt_a, m, m, m, 1, &a );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &xK, &x );
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_setm( &BLIS_ZERO, &x );
bl2_setm( &BLIS_ZERO, &a );
bl2_randm( &x );
bl2_randm( &a );
bl2_setsc( (2.0/1.0), &alpha );
//bl2_setsc( (1.0/1.0), &alpha );
bl2_printm( "x", &x, "%4.1f", "" );
bl2_printm( "a", &a, "%4.1f", "" );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, x );
bl2_her( &alpha, &x, &a );
//bl2_her_unb_var1( &alpha, &x, &a, NULL );
//bl2_her_unb_var2( &alpha, &x, &a, NULL );
bl2_printm( "a after", &a, "%4.1f", "" );
bl2_obj_free( &xK );
bl2_obj_free( &a );
bl2_obj_free( &alpha );
bl2_finalize();
return 0;
}
#endif
#if 0
{
obj_t alpha, a, x, y, xK, yK;
num_t dt_alpha, dt_a, dt_x, dt_y;
dim_t m, K;
//dt_x = BLIS_DCOMPLEX;
//dt_y = BLIS_DCOMPLEX;
//dt_a = BLIS_DCOMPLEX;
//dt_alpha = BLIS_DCOMPLEX;
dt_x = BLIS_DOUBLE;
dt_y = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_alpha = BLIS_DOUBLE;
m = 13;
K = 4;
bl2_init();
bl2_obj_create( dt_x, K, m, 0, 0, &xK );
bl2_obj_create( dt_y, K, m, 0, 0, &yK );
//bl2_obj_create( dt_a, m, m, m, 1, &a );
bl2_obj_create( dt_a, m, m, 0, 0, &a );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &xK, &x );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &yK, &y );
bl2_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bl2_setm( &BLIS_ZERO, &x );
bl2_setm( &BLIS_ZERO, &y );
bl2_setm( &BLIS_ZERO, &a );
bl2_randm( &x );
bl2_randm( &y );
bl2_randm( &a );
//bl2_randv( &alpha );
bl2_setsc( (2.0/1.0), &alpha );
//bl2_setsc( (1.0/1.0), &alpha );
bl2_printm( "x", &x, "%4.1f", "" );
bl2_printm( "y", &y, "%4.1f", "" );
bl2_printm( "a", &a, "%4.1f", "" );
//bl2_printv( "alpha", &alpha, "%4.1f", "" );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_conj( BLIS_CONJUGATE, x );
bl2_her2( &alpha, &x, &y, &a );
//bl2_her2_unb_var1( &alpha, &x, &y, &a, NULL );
//bl2_her2_unb_var2( &alpha, &x, &y, &a, NULL );
//bl2_her2_unb_var3( &alpha, &x, &y, &a, NULL );
//bl2_her2_unb_var4( &alpha, &x, &y, &a, NULL );
//bl2_her2_unf_var4( &alpha, &x, &y, &a, NULL );
bl2_printm( "a after", &a, "%4.1f", "" );
bl2_obj_free( &xK );
bl2_obj_free( &yK );
bl2_obj_free( &a );
bl2_obj_free( &alpha );
bl2_finalize();
return 0;
}
#endif
#if 0
{
obj_t alpha, a, x, xK;
num_t dt_alpha, dt_a, dt_x;
dim_t m, K;
//dt_alpha = BLIS_DCOMPLEX;
//dt_a = BLIS_DCOMPLEX;
//dt_x = BLIS_DCOMPLEX;
dt_alpha = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_x = BLIS_DOUBLE;
m = 13;
K = 4;
bl2_init();
//bl2_obj_create( dt_a, m, m, 0, 0, &a );
bl2_obj_create( dt_a, m, m, m, 1, &a );
bl2_obj_create( dt_x, K, m, 0, 0, &xK );
bl2_obj_create( dt_x, 1, 1, 0, 0, &alpha );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &xK, &x );
bl2_setm( &BLIS_ZERO, &a );
bl2_setm( &BLIS_ZERO, &x );
bl2_randm( &a );
bl2_randm( &x );
bl2_setsc( (1.0/1.0), &alpha );
bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_conjtrans( BLIS_NO_TRANSPOSE, a );
//bl2_obj_set_conjtrans( BLIS_TRANSPOSE, a );
bl2_obj_set_diag( BLIS_NONUNIT_DIAG, a );
bl2_printm( "a", &a, "%4.1f", "" );
bl2_printm( "x", &x, "%4.1f", "" );
bl2_trmv( &alpha, &a, &x );
//bl2_trmv_unb_var2( &alpha, &a, &x, NULL );
//bl2_trmv_unb_var1( &alpha, &a, &x, NULL );
//bl2_trmv_unf_var1( &alpha, &a, &x, NULL );
//bl2_trmv_unf_var2( &alpha, &a, &x, NULL );
bl2_printm( "x after", &x, "%4.1f", "" );
bl2_obj_free( &alpha );
bl2_obj_free( &a );
bl2_obj_free( &xK );
bl2_finalize();
return 0;
}
#endif
#if 0
{
obj_t alpha, a, x, xK;
num_t dt_alpha, dt_a, dt_x;
dim_t m, K;
dt_alpha = BLIS_DOUBLE;
dt_a = BLIS_DOUBLE;
dt_x = BLIS_DOUBLE;
m = 13;
K = 4;
bl2_init();
//bl2_obj_create( dt_a, m, m, 0, 0, &a );
bl2_obj_create( dt_a, m, m, m, 1, &a );
bl2_obj_create( dt_x, K, m, 0, 0, &xK );
bl2_obj_create( dt_x, 1, 1, 0, 0, &alpha );
bl2_acquire_mpart_t2b( BLIS_SUBPART1, 0, 1, &xK, &x );
bl2_setm( &BLIS_ZERO, &a );
bl2_setm( &BLIS_ZERO, &x );
bl2_randm( &a );
bl2_randm( &x );
bl2_setsc( (2.0/1.0), &alpha );
bl2_obj_set_uplo( BLIS_UPPER, a );
//bl2_obj_set_uplo( BLIS_LOWER, a );
bl2_obj_set_conjtrans( BLIS_NO_TRANSPOSE, a );
bl2_obj_set_diag( BLIS_NONUNIT_DIAG, a );
bl2_printm( "a", &a, "%4.1f", "" );
bl2_printm( "x", &x, "%4.1f", "" );
bl2_trsv( &alpha, &a, &x );
//bl2_trsv_unb_var1( &alpha, &a, &x, NULL );
//bl2_trsv_unb_var2( &alpha, &a, &x, NULL );
//bl2_trsv_unf_var1( &alpha, &a, &x, NULL );
//bl2_trsv_unf_var2( &alpha, &a, &x, NULL );
bl2_printm( "x after", &x, "%4.1f", "" );
bl2_obj_free( &alpha );
bl2_obj_free( &a );
bl2_obj_free( &xK );
bl2_finalize();
return 0;
}
#endif
// -- Level 3 ------------------------------------------------------------------
#if 0
{
obj_t a, b;
obj_t alpha;
num_t dt_a, dt_b;
num_t dt_al;
dim_t m, n;
dt_a = BLIS_DCOMPLEX;
dt_b = BLIS_DCOMPLEX;
dt_al = BLIS_DCOMPLEX;
m = 9;
n = 5;
bl2_init();
bl2_obj_create( dt_a, m, m, 0, 0, &a );
//bl2_obj_create( dt_b, m, n, 0, 0, &b );
bl2_obj_create( dt_b, n, m, 0, 0, &b );
bl2_obj_create( dt_al, 1, 1, 0, 0, &alpha );
bl2_setm( &BLIS_ZERO, &a );
bl2_setm( &BLIS_ZERO, &b );
bl2_randm( &a );
bl2_randm( &b );
//bl2_setsc( (2.0/1.0), &alpha );
bl2_setsc( (1.0/1.0), &alpha );
bl2_obj_set_uplo( BLIS_LOWER, a );
//bl2_obj_set_uplo( BLIS_UPPER, a );
bl2_obj_set_conjtrans( BLIS_TRANSPOSE, a );
bl2_obj_set_diag( BLIS_NONUNIT_DIAG, a );
bl2_printm( "a", &a, "%4.1f", "" );
bl2_printm( "b", &b, "%4.1f", "" );
//bl2_trmm_lu_unb_var3( &alpha, &a, &b, NULL );
//bl2_trmm_lu_unb_var2( &alpha, &a, &b, NULL );
//bl2_trmm_lu_unb_var1( &alpha, &a, &b, NULL );
//bl2_trmm_ll_unb_var3( &alpha, &a, &b, NULL );
//bl2_trmm_ll_unb_var2( &alpha, &a, &b, NULL );
//bl2_trmm_ll_unb_var1( &alpha, &a, &b, NULL );
//bl2_trmm( BLIS_LEFT, &alpha, &a, &b );
bl2_trmm( BLIS_RIGHT, &alpha, &a, &b );
bl2_printm( "b after", &b, "%4.1f", "" );
bl2_obj_free( &a );
bl2_obj_free( &b );
bl2_finalize();
return 0;
}
#endif
}