mirror of
https://github.com/amd/blis.git
synced 2026-05-05 06:51:11 +00:00
Details: - Defined two new functions in bli_init.c: bli_init_once() and bli_finalize_once(). Each is implemented with pthread_once(), which guarantees that, among the threads that pass in the same pthread_once_t data structure, exactly one thread will execute a user-defined function. (Thus, there is now a runtime dependency against libpthread even when multithreading is not enabled at configure-time.) - Added calls to bli_init_once() to top-level user APIs for all computational operations as well as many other functions in BLIS to all but guarantee that BLIS will self-initialize through the normal use of its functions. - Rewrote and simplified bli_init() and bli_finalize() and related functions. - Added -lpthread to LDFLAGS in common.mk. - Modified the bli_init_auto()/_finalize_auto() functions used by the BLAS compatibility layer to take and return no arguments. (The previous API that tracked whether BLIS was initialized, and then only finalized if it was initialized in the same function, was too cute by half and borderline useless because by default BLIS stays initialized when auto-initialized via the compatibility layer.) - Removed static variables that track initialization of the sub-APIs in bli_const.c, bli_error.c, bli_init.c, bli_memsys.c, bli_thread, and bli_ind.c. We don't need to track initialization at the sub-API level, especially now that BLIS can self-initialize. - Added a critical section around the changing of the error checking level in bli_error.c. - Deprecated bli_ind_oper_has_avail() as well as all functions bli_<opname>_ind_get_avail(), where <opname> is a level-3 operation name. These functions had no use cases within BLIS and likely none outside of BLIS. - Commented out calls to bli_init() and bli_finalize() in testsuite's main() function, and likewise for standalone test drivers in 'test' directory, so that self-initialization is exercised by default.
317 lines
10 KiB
C
317 lines
10 KiB
C
/*
|
|
|
|
BLIS
|
|
An object-based framework for developing high-performance BLAS-like
|
|
libraries.
|
|
|
|
Copyright (C) 2014, The University of Texas
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
- Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
- Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
- Neither the name of The University of Texas nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
#include <unistd.h>
|
|
#include "blis.h"
|
|
|
|
|
|
//#define PRINT
|
|
|
|
int main( int argc, char** argv )
|
|
{
|
|
//bli_init();
|
|
|
|
#if 0
|
|
obj_t a, b, c;
|
|
obj_t aa, bb, cc;
|
|
dim_t m, n, k;
|
|
num_t dt;
|
|
uplo_t uploa, uplob, uploc;
|
|
|
|
{
|
|
dt = BLIS_DOUBLE;
|
|
|
|
m = 6;
|
|
k = 6;
|
|
n = 6;
|
|
|
|
bli_obj_create( dt, m, k, 0, 0, &a );
|
|
bli_obj_create( dt, k, n, 0, 0, &b );
|
|
bli_obj_create( dt, m, n, 0, 0, &c );
|
|
|
|
uploa = BLIS_UPPER;
|
|
uploa = BLIS_LOWER;
|
|
bli_obj_set_struc( BLIS_TRIANGULAR, a );
|
|
bli_obj_set_uplo( uploa, a );
|
|
bli_obj_set_diag_offset( -2, a );
|
|
|
|
uplob = BLIS_UPPER;
|
|
uplob = BLIS_LOWER;
|
|
bli_obj_set_struc( BLIS_TRIANGULAR, b );
|
|
bli_obj_set_uplo( uplob, b );
|
|
bli_obj_set_diag_offset( -2, b );
|
|
|
|
uploc = BLIS_UPPER;
|
|
//uploc = BLIS_LOWER;
|
|
//uploc = BLIS_ZEROS;
|
|
//uploc = BLIS_DENSE;
|
|
bli_obj_set_struc( BLIS_HERMITIAN, c );
|
|
//bli_obj_set_struc( BLIS_TRIANGULAR, c );
|
|
bli_obj_set_uplo( uploc, c );
|
|
bli_obj_set_diag_offset( 1, c );
|
|
|
|
bli_obj_alias_to( a, aa ); (void)aa;
|
|
bli_obj_alias_to( b, bb ); (void)bb;
|
|
bli_obj_alias_to( c, cc ); (void)cc;
|
|
|
|
bli_randm( &a );
|
|
bli_randm( &b );
|
|
bli_randm( &c );
|
|
//bli_mkherm( &a );
|
|
//bli_mktrim( &a );
|
|
|
|
bli_prune_unref_mparts( &cc, BLIS_M,
|
|
&aa, BLIS_N );
|
|
|
|
bli_printm( "c orig", &c, "%4.1f", "" );
|
|
bli_printm( "c alias", &cc, "%4.1f", "" );
|
|
bli_printm( "a orig", &a, "%4.1f", "" );
|
|
bli_printm( "a alias", &aa, "%4.1f", "" );
|
|
//bli_obj_print( "a struct", &a );
|
|
}
|
|
#endif
|
|
|
|
dim_t p_begin, p_max, p_inc;
|
|
gint_t m_input, n_input;
|
|
char uploa_ch;
|
|
doff_t diagoffa;
|
|
dim_t bf;
|
|
dim_t n_way;
|
|
char part_dim_ch;
|
|
bool_t go_fwd;
|
|
char out_ch;
|
|
|
|
obj_t a;
|
|
blksz_t bfs;
|
|
|
|
thrinfo_t thrinfo;
|
|
dim_t m, n;
|
|
uplo_t uploa;
|
|
bool_t part_m_dim, part_n_dim;
|
|
bool_t go_bwd;
|
|
dim_t p;
|
|
num_t dt;
|
|
dim_t start, end;
|
|
|
|
dim_t width;
|
|
siz_t area;
|
|
|
|
gint_t t_begin, t_stop, t_inc;
|
|
dim_t t;
|
|
|
|
if ( argc == 13 )
|
|
{
|
|
sscanf( argv[1], "%lu", &p_begin );
|
|
sscanf( argv[2], "%lu", &p_max );
|
|
sscanf( argv[3], "%lu", &p_inc );
|
|
sscanf( argv[4], "%ld", &m_input );
|
|
sscanf( argv[5], "%ld", &n_input );
|
|
sscanf( argv[6], "%c", &uploa_ch );
|
|
sscanf( argv[7], "%ld", &diagoffa );
|
|
sscanf( argv[8], "%lu", &bf );
|
|
sscanf( argv[9], "%lu", &n_way );
|
|
sscanf( argv[10], "%c", &part_dim_ch );
|
|
sscanf( argv[11], "%lu", &go_fwd );
|
|
sscanf( argv[12], "%c", &out_ch );
|
|
}
|
|
else
|
|
{
|
|
printf( "\n" );
|
|
printf( " %s\n", argv[0] );
|
|
printf( "\n" );
|
|
printf( " Simulate the dimension ranges assigned to threads when\n" );
|
|
printf( " partitioning a matrix for parallelism in BLIS.\n" );
|
|
printf( "\n" );
|
|
printf( " Usage:\n" );
|
|
printf( "\n" );
|
|
printf( " %s p_beg p_max p_inc m n uplo doff bf n_way part_dim go_fwd out\n", argv[0] );
|
|
printf( "\n" );
|
|
printf( " p_beg: the first problem size p to test.\n" );
|
|
printf( " p_max: the maximum problem size p to test.\n" );
|
|
printf( " p_inc: the increase in problem size p between tests.\n" );
|
|
printf( " m: the m dimension:\n" );
|
|
printf( " n: the n dimension:\n" );
|
|
printf( " if m,n = -1: bind m,n to problem size p.\n" );
|
|
printf( " if m,n = 0: bind m,n to p_max.\n" );
|
|
printf( " if m,n > 0: hold m,n = c constant for all p.\n" );
|
|
printf( " uplo: the uplo field of the matrix being partitioned:\n" );
|
|
printf( " 'l': lower-stored (BLIS_LOWER)\n" );
|
|
printf( " 'u': upper-stored (BLIS_UPPER)\n" );
|
|
printf( " 'd': densely-stored (BLIS_DENSE)\n" );
|
|
printf( " doff: the diagonal offset of the matrix being partitioned.\n" );
|
|
printf( " bf: the simulated blocking factor. all thread ranges must\n" );
|
|
printf( " be a multiple of bf, except for the range that contains\n" );
|
|
printf( " the edge case (if one exists). the blocking factor\n" );
|
|
printf( " would typically correspond to a register blocksize.\n" );
|
|
printf( " n_way: the number of ways of parallelism for which we are\n" );
|
|
printf( " partitioning (i.e.: the number of threads, or thread\n" );
|
|
printf( " groups).\n" );
|
|
printf( " part_dim: the dimension to partition:\n" );
|
|
printf( " 'm': partition the m dimension.\n" );
|
|
printf( " 'n': partition the n dimension.\n" );
|
|
printf( " go_fwd: the direction to partition:\n" );
|
|
printf( " '1': forward, e.g. left-to-right (part_dim = 'm') or\n" );
|
|
printf( " top-to-bottom (part_dim = 'n')\n" );
|
|
printf( " '0': backward, e.g. right-to-left (part_dim = 'm') or\n" );
|
|
printf( " bottom-to-top (part_dim = 'n')\n" );
|
|
printf( " NOTE: reversing the direction does not change the\n" );
|
|
printf( " subpartitions' widths, but it does change which end of\n" );
|
|
printf( " the index range receives the edge case, if it exists.\n" );
|
|
printf( " out: the type of output per thread-column:\n" );
|
|
printf( " 'w': the width (and area) of the thread's subpartition\n" );
|
|
printf( " 'r': the actual ranges of the thread's subpartition\n" );
|
|
printf( " where the start and end points of each range are\n" );
|
|
printf( " inclusive and exclusive, respectively.\n" );
|
|
printf( "\n" );
|
|
|
|
exit(1);
|
|
}
|
|
|
|
if ( m_input == 0 ) m_input = p_max;
|
|
if ( n_input == 0 ) n_input = p_max;
|
|
|
|
if ( part_dim_ch == 'm' ) { part_m_dim = TRUE; part_n_dim = FALSE; }
|
|
else { part_m_dim = FALSE; part_n_dim = TRUE; }
|
|
|
|
go_bwd = !go_fwd;
|
|
|
|
if ( uploa_ch == 'l' ) uploa = BLIS_LOWER;
|
|
else if ( uploa_ch == 'u' ) uploa = BLIS_UPPER;
|
|
else uploa = BLIS_DENSE;
|
|
|
|
if ( part_n_dim )
|
|
{
|
|
if ( bli_is_upper( uploa ) ) { t_begin = n_way-1; t_stop = -1; t_inc = -1; }
|
|
else /* if lower or dense */ { t_begin = 0; t_stop = n_way; t_inc = 1; }
|
|
}
|
|
else // if ( part_m_dim )
|
|
{
|
|
if ( bli_is_lower( uploa ) ) { t_begin = n_way-1; t_stop = -1; t_inc = -1; }
|
|
else /* if upper or dense */ { t_begin = 0; t_stop = n_way; t_inc = 1; }
|
|
}
|
|
|
|
printf( "\n" );
|
|
printf( " part: %3s doff: %3ld bf: %3ld output: %s\n",
|
|
( part_n_dim ? ( go_fwd ? "l2r" : "r2l" )
|
|
: ( go_fwd ? "t2b" : "b2t" ) ),
|
|
diagoffa, bf,
|
|
( out_ch == 'w' ? "width(area)" : "ranges" ) );
|
|
printf( " uplo: %3c nt: %3ld\n", uploa_ch, n_way );
|
|
printf( "\n" );
|
|
|
|
printf( " " );
|
|
for ( t = t_begin; t != t_stop; t += t_inc )
|
|
{
|
|
if ( part_n_dim )
|
|
{
|
|
if ( t == t_begin ) printf( "left... " );
|
|
else if ( t == t_stop-t_inc ) printf( " ...right" );
|
|
else printf( " " );
|
|
}
|
|
else // if ( part_m_dim )
|
|
{
|
|
if ( t == t_begin ) printf( "top... " );
|
|
else if ( t == t_stop-t_inc ) printf( " ...bottom" );
|
|
else printf( " " );
|
|
}
|
|
}
|
|
printf( "\n" );
|
|
|
|
|
|
printf( "%4c x %4c ", 'm', 'n' );
|
|
for ( t = t_begin; t != t_stop; t += t_inc )
|
|
{
|
|
printf( "%9s %lu ", "thread", t );
|
|
}
|
|
printf( "\n" );
|
|
printf( "-------------" );
|
|
for ( t = t_begin; t != t_stop; t += t_inc )
|
|
{
|
|
printf( "-------------" );
|
|
}
|
|
printf( "\n" );
|
|
|
|
|
|
for ( p = p_begin; p <= p_max; p += p_inc )
|
|
{
|
|
if ( m_input < 0 ) m = ( dim_t )p;
|
|
else m = ( dim_t )m_input;
|
|
if ( n_input < 0 ) n = ( dim_t )p;
|
|
else n = ( dim_t )n_input;
|
|
|
|
dt = BLIS_DOUBLE;
|
|
|
|
bli_obj_create( dt, m, n, 0, 0, &a );
|
|
|
|
bli_obj_set_struc( BLIS_TRIANGULAR, a );
|
|
bli_obj_set_uplo( uploa, a );
|
|
bli_obj_set_diag_offset( diagoffa, a );
|
|
|
|
bli_randm( &a );
|
|
|
|
bli_blksz_init_easy( &bfs, bf, bf, bf, bf );
|
|
|
|
printf( "%4lu x %4lu ", m, n );
|
|
|
|
for ( t = t_begin; t != t_stop; t += t_inc )
|
|
{
|
|
thrinfo.n_way = n_way;
|
|
thrinfo.work_id = t;
|
|
|
|
if ( part_n_dim && go_fwd )
|
|
area = bli_thread_get_range_weighted_l2r( &thrinfo, &a, &bfs, &start, &end );
|
|
else if ( part_n_dim && go_bwd )
|
|
area = bli_thread_get_range_weighted_r2l( &thrinfo, &a, &bfs, &start, &end );
|
|
else if ( part_m_dim && go_fwd )
|
|
area = bli_thread_get_range_weighted_t2b( &thrinfo, &a, &bfs, &start, &end );
|
|
else // ( part_m_dim && go_bwd )
|
|
area = bli_thread_get_range_weighted_b2t( &thrinfo, &a, &bfs, &start, &end );
|
|
|
|
width = end - start;
|
|
|
|
if ( out_ch == 'w' ) printf( "%4lu(%6lu) ", width, area );
|
|
else printf( "[%4lu,%4lu) ", start, end );
|
|
}
|
|
|
|
printf( "\n" );
|
|
|
|
bli_obj_free( &a );
|
|
}
|
|
|
|
//bli_finalize();
|
|
|
|
return 0;
|
|
}
|
|
|