mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Added files specific to threading for gemm and packm operations
This commit is contained in:
57
frame/1m/packm/bli_packm_threading.c
Normal file
57
frame/1m/packm/bli_packm_threading.c
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
packm_thrinfo_t* bli_create_packm_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id,
|
||||
dim_t n_way, dim_t work_id )
|
||||
{
|
||||
return (packm_thrinfo_t*) bli_create_thread_info( ocomm, ocomm_id, icomm, icomm_id, n_way, work_id );
|
||||
}
|
||||
|
||||
void bli_setup_packm_thread_info( packm_thrinfo_t* thread, thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id,
|
||||
dim_t n_way, dim_t work_id )
|
||||
{
|
||||
bli_setup_thread_info( (thrinfo_t*) thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id );
|
||||
}
|
||||
|
||||
void bli_setup_packm_single_threaded_info( packm_thrinfo_t* thread )
|
||||
{
|
||||
thread->ocomm = &BLIS_SINGLE_COMM;
|
||||
thread->ocomm_id = 0;
|
||||
thread->icomm = &BLIS_SINGLE_COMM;
|
||||
thread->icomm_id = 0;
|
||||
thread->n_way = 1;
|
||||
thread->work_id = 0;
|
||||
}
|
||||
51
frame/1m/packm/bli_packm_threading.h
Normal file
51
frame/1m/packm/bli_packm_threading.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
struct packm_thrinfo_s //implements thrinfo_t
|
||||
{
|
||||
thread_comm_t* ocomm; //The thread communicator for the other threads sharing the same work at this level
|
||||
dim_t ocomm_id; //Our thread id within that thread comm
|
||||
thread_comm_t* icomm; //The thread communicator for the other threads sharing the same work at this level
|
||||
dim_t icomm_id; //Our thread id within that thread comm
|
||||
|
||||
dim_t n_way; //Number of distinct caucuses used to parallelize the loop
|
||||
dim_t work_id; //What we're working on
|
||||
};
|
||||
typedef struct packm_thrinfo_s packm_thrinfo_t;
|
||||
|
||||
packm_thrinfo_t* bli_create_packm_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id,
|
||||
dim_t n_way, dim_t work_id );
|
||||
void bli_setup_packm_thread_info( packm_thrinfo_t* thread, thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id,
|
||||
dim_t n_way, dim_t work_id );
|
||||
void bli_setup_packm_single_threaded_info( packm_thrinfo_t* thread );
|
||||
170
frame/3/gemm/bli_gemm_threading.c
Normal file
170
frame/3/gemm/bli_gemm_threading.c
Normal file
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
#include "assert.h"
|
||||
|
||||
void bli_setup_gemm_thrinfo_node( gemm_thrinfo_t* thread,
|
||||
thread_comm_t* ocomm, dim_t ocomm_id,
|
||||
thread_comm_t* icomm, dim_t icomm_id,
|
||||
dim_t n_way, dim_t work_id,
|
||||
packm_thrinfo_t* opackm,
|
||||
packm_thrinfo_t* ipackm,
|
||||
gemm_thrinfo_t* sub_gemm )
|
||||
{
|
||||
thread->ocomm = ocomm;
|
||||
thread->ocomm_id = ocomm_id;
|
||||
thread->icomm = icomm;
|
||||
thread->icomm_id = icomm_id;
|
||||
thread->n_way = n_way;
|
||||
thread->work_id = work_id;
|
||||
thread->opackm = opackm;
|
||||
thread->ipackm = ipackm;
|
||||
thread->sub_gemm = sub_gemm;
|
||||
}
|
||||
|
||||
void bli_setup_gemm_single_threaded_info( gemm_thrinfo_t* thread )
|
||||
{
|
||||
thread->ocomm = &BLIS_SINGLE_COMM;
|
||||
thread->ocomm_id = 0;
|
||||
thread->icomm = &BLIS_SINGLE_COMM;
|
||||
thread->icomm_id = 0;
|
||||
thread->n_way = 1;
|
||||
thread->work_id = 0;
|
||||
thread->opackm = &BLIS_PACKM_SINGLE_THREADED;
|
||||
thread->ipackm = &BLIS_PACKM_SINGLE_THREADED;
|
||||
thread->sub_gemm = thread;
|
||||
}
|
||||
|
||||
gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_id,
|
||||
thread_comm_t* icomm, dim_t icomm_id,
|
||||
dim_t n_way, dim_t work_id,
|
||||
packm_thrinfo_t* opackm,
|
||||
packm_thrinfo_t* ipackm,
|
||||
gemm_thrinfo_t* sub_gemm )
|
||||
{
|
||||
gemm_thrinfo_t* thread = ( gemm_thrinfo_t* ) bli_malloc( sizeof( gemm_thrinfo_t ) );
|
||||
bli_setup_gemm_thrinfo_node( thread, ocomm, ocomm_id,
|
||||
icomm, icomm_id,
|
||||
n_way, work_id,
|
||||
opackm,
|
||||
ipackm,
|
||||
sub_gemm );
|
||||
return thread;
|
||||
}
|
||||
|
||||
gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_levels )
|
||||
{
|
||||
|
||||
assert(n_levels == 5);
|
||||
|
||||
dim_t jc_way = threads_at_level[0];
|
||||
dim_t kc_way = threads_at_level[1];
|
||||
dim_t ic_way = threads_at_level[2];
|
||||
dim_t jr_way = threads_at_level[3];
|
||||
dim_t ir_way = threads_at_level[4];
|
||||
|
||||
dim_t global_num_threads = jc_way * kc_way * ic_way * jr_way * ir_way;
|
||||
dim_t jc_nt = kc_way * ic_way * jr_way * ir_way;
|
||||
dim_t kc_nt = ic_way * jr_way * ir_way;
|
||||
dim_t ic_nt = jr_way * ir_way;
|
||||
dim_t jr_nt = ir_way;
|
||||
dim_t ir_nt = 1;
|
||||
|
||||
gemm_thrinfo_t* paths = (gemm_thrinfo_t*) malloc( global_num_threads * sizeof( gemm_thrinfo_t ) );
|
||||
|
||||
thread_comm_t* global_comm = bli_create_communicator( global_num_threads );
|
||||
for( int a = 0; a < jc_nt; a++ )
|
||||
{
|
||||
thread_comm_t* jc_comm = bli_create_communicator( jc_nt );
|
||||
for( int b = 0; b < kc_nt; b++ )
|
||||
{
|
||||
thread_comm_t* kc_comm = bli_create_communicator( kc_nt );
|
||||
for( int c = 0; c < ic_nt; c++ )
|
||||
{
|
||||
thread_comm_t* ic_comm = bli_create_communicator( ic_nt );
|
||||
for( int d = 0; d < jr_nt; d++ )
|
||||
{
|
||||
thread_comm_t* jr_comm = bli_create_communicator( jr_nt );
|
||||
for( int e = 0; e < jc_nt; e++)
|
||||
{
|
||||
thread_comm_t* ir_comm = bli_create_communicator( ir_nt );
|
||||
|
||||
dim_t ir_comm_id = 0;
|
||||
dim_t jr_comm_id = e*ir_nt + ir_comm_id;
|
||||
dim_t ic_comm_id = d*jr_nt + jr_comm_id;
|
||||
dim_t kc_comm_id = c*ic_nt + ic_comm_id;
|
||||
dim_t jc_comm_id = b*kc_nt + kc_comm_id;
|
||||
dim_t global_comm_id = a*jc_nt + jc_comm_id;
|
||||
|
||||
gemm_thrinfo_t* ir_info = bli_create_gemm_thrinfo_node( jr_comm, jr_comm_id,
|
||||
ir_comm, ir_comm_id,
|
||||
ir_way, e,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
gemm_thrinfo_t* jr_info = bli_create_gemm_thrinfo_node( ic_comm, ic_comm_id,
|
||||
jr_comm, jr_comm_id,
|
||||
jr_way, d,
|
||||
NULL, NULL, ir_info);
|
||||
|
||||
packm_thrinfo_t* packb = bli_create_packm_thread_info( kc_comm, kc_comm_id,
|
||||
ic_comm, ic_comm_id,
|
||||
kc_nt, kc_comm_id );
|
||||
|
||||
packm_thrinfo_t* packa = bli_create_packm_thread_info( ic_comm, ic_comm_id,
|
||||
jr_comm, jr_comm_id,
|
||||
ic_nt, ic_comm_id );
|
||||
|
||||
gemm_thrinfo_t* ic_info = bli_create_gemm_thrinfo_node( kc_comm, kc_comm_id,
|
||||
ic_comm, ic_comm_id,
|
||||
ic_way, c,
|
||||
packb, packa, jr_info);
|
||||
|
||||
gemm_thrinfo_t* kc_info = bli_create_gemm_thrinfo_node( jc_comm, jc_comm_id,
|
||||
kc_comm, kc_comm_id,
|
||||
kc_way, b,
|
||||
NULL, NULL, ic_info);
|
||||
|
||||
gemm_thrinfo_t* jc_info = &paths[global_comm_id];
|
||||
bli_setup_gemm_thrinfo_node( jc_info, global_comm, global_comm_id,
|
||||
jr_comm, jr_comm_id,
|
||||
jr_way, a,
|
||||
NULL, NULL, kc_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
73
frame/3/gemm/bli_gemm_threading.h
Normal file
73
frame/3/gemm/bli_gemm_threading.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
struct gemm_thrinfo_s //implements thrinfo_t
|
||||
{
|
||||
thread_comm_t* ocomm; //The thread communicator for the other threads sharing the same work at this level
|
||||
dim_t ocomm_id; //Our thread id within that thread comm
|
||||
thread_comm_t* icomm; //The thread communicator for the other threads sharing the same work at this level
|
||||
dim_t icomm_id; //Our thread id within that thread comm
|
||||
|
||||
dim_t n_way; //Number of distinct caucuses used to parallelize the loop
|
||||
dim_t work_id; //What we're working on
|
||||
|
||||
packm_thrinfo_t* opackm;
|
||||
packm_thrinfo_t* ipackm;
|
||||
struct gemm_thrinfo_s* sub_gemm;
|
||||
};
|
||||
typedef struct gemm_thrinfo_s gemm_thrinfo_t;
|
||||
|
||||
#define gemm_thread_sub_gemm( thread ) thread->sub_gemm
|
||||
#define gemm_thread_sub_opackm( thread ) thread->opackm
|
||||
#define gemm_thread_sub_ipackm( thread ) thread->ipackm
|
||||
|
||||
gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_levels );
|
||||
|
||||
void bli_setup_gemm_thrinfo_node( gemm_thrinfo_t* thread,
|
||||
thread_comm_t* ocomm, dim_t ocomm_id,
|
||||
thread_comm_t* icomm, dim_t icomm_id,
|
||||
dim_t n_way, dim_t work_id,
|
||||
packm_thrinfo_t* opackm,
|
||||
packm_thrinfo_t* ipackm,
|
||||
gemm_thrinfo_t* sub_gemm );
|
||||
|
||||
gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_id,
|
||||
thread_comm_t* icomm, dim_t icomm_id,
|
||||
dim_t n_way, dim_t work_id,
|
||||
packm_thrinfo_t* opackm,
|
||||
packm_thrinfo_t* ipackm,
|
||||
gemm_thrinfo_t* sub_gemm );
|
||||
|
||||
void bli_setup_gemm_single_threaded_info( gemm_thrinfo_t* thread );
|
||||
Reference in New Issue
Block a user