From 020f80c30289d8bcaa688bf600b01fae9b23b54f Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Tue, 11 Mar 2014 12:08:17 -0500 Subject: [PATCH] Added files specific to threading for gemm and packm operations --- frame/1m/packm/bli_packm_threading.c | 57 +++++++++ frame/1m/packm/bli_packm_threading.h | 51 ++++++++ frame/3/gemm/bli_gemm_threading.c | 170 +++++++++++++++++++++++++++ frame/3/gemm/bli_gemm_threading.h | 73 ++++++++++++ 4 files changed, 351 insertions(+) create mode 100644 frame/1m/packm/bli_packm_threading.c create mode 100644 frame/1m/packm/bli_packm_threading.h create mode 100644 frame/3/gemm/bli_gemm_threading.c create mode 100644 frame/3/gemm/bli_gemm_threading.h diff --git a/frame/1m/packm/bli_packm_threading.c b/frame/1m/packm/bli_packm_threading.c new file mode 100644 index 000000000..0fa6b0bf2 --- /dev/null +++ b/frame/1m/packm/bli_packm_threading.c @@ -0,0 +1,57 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +packm_thrinfo_t* bli_create_packm_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, + dim_t n_way, dim_t work_id ) +{ + return (packm_thrinfo_t*) bli_create_thread_info( ocomm, ocomm_id, icomm, icomm_id, n_way, work_id ); +} + +void bli_setup_packm_thread_info( packm_thrinfo_t* thread, thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, + dim_t n_way, dim_t work_id ) +{ + bli_setup_thread_info( (thrinfo_t*) thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id ); +} + +void bli_setup_packm_single_threaded_info( packm_thrinfo_t* thread ) +{ + thread->ocomm = &BLIS_SINGLE_COMM; + thread->ocomm_id = 0; + thread->icomm = &BLIS_SINGLE_COMM; + thread->icomm_id = 0; + thread->n_way = 1; + thread->work_id = 0; +} diff --git a/frame/1m/packm/bli_packm_threading.h b/frame/1m/packm/bli_packm_threading.h new file mode 100644 index 000000000..12be0c9cd --- /dev/null +++ b/frame/1m/packm/bli_packm_threading.h @@ -0,0 +1,51 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +struct packm_thrinfo_s //implements thrinfo_t +{ + thread_comm_t* ocomm; //The thread communicator for the other threads sharing the same work at this level + dim_t ocomm_id; //Our thread id within that thread comm + thread_comm_t* icomm; //The thread communicator for the other threads sharing the same work at this level + dim_t icomm_id; //Our thread id within that thread comm + + dim_t n_way; //Number of distinct caucuses used to parallelize the loop + dim_t work_id; //What we're working on +}; +typedef struct packm_thrinfo_s packm_thrinfo_t; + +packm_thrinfo_t* bli_create_packm_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, + dim_t n_way, dim_t work_id ); +void bli_setup_packm_thread_info( packm_thrinfo_t* thread, thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, + dim_t n_way, dim_t work_id ); +void bli_setup_packm_single_threaded_info( packm_thrinfo_t* thread ); diff --git a/frame/3/gemm/bli_gemm_threading.c b/frame/3/gemm/bli_gemm_threading.c new file mode 100644 index 000000000..53511fcc0 --- /dev/null +++ b/frame/3/gemm/bli_gemm_threading.c @@ -0,0 +1,170 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "assert.h" + +void bli_setup_gemm_thrinfo_node( gemm_thrinfo_t* thread, + thread_comm_t* ocomm, dim_t ocomm_id, + thread_comm_t* icomm, dim_t icomm_id, + dim_t n_way, dim_t work_id, + packm_thrinfo_t* opackm, + packm_thrinfo_t* ipackm, + gemm_thrinfo_t* sub_gemm ) +{ + thread->ocomm = ocomm; + thread->ocomm_id = ocomm_id; + thread->icomm = icomm; + thread->icomm_id = icomm_id; + thread->n_way = n_way; + thread->work_id = work_id; + thread->opackm = opackm; + thread->ipackm = ipackm; + thread->sub_gemm = sub_gemm; +} + +void bli_setup_gemm_single_threaded_info( gemm_thrinfo_t* thread ) +{ + thread->ocomm = &BLIS_SINGLE_COMM; + thread->ocomm_id = 0; + thread->icomm = &BLIS_SINGLE_COMM; + thread->icomm_id = 0; + thread->n_way = 1; + thread->work_id = 0; + thread->opackm = &BLIS_PACKM_SINGLE_THREADED; + thread->ipackm = &BLIS_PACKM_SINGLE_THREADED; + thread->sub_gemm = thread; +} + +gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_id, + thread_comm_t* icomm, dim_t icomm_id, + dim_t n_way, dim_t work_id, + packm_thrinfo_t* opackm, + packm_thrinfo_t* ipackm, + gemm_thrinfo_t* sub_gemm ) +{ + gemm_thrinfo_t* thread = ( gemm_thrinfo_t* ) bli_malloc( sizeof( gemm_thrinfo_t ) ); + bli_setup_gemm_thrinfo_node( thread, ocomm, ocomm_id, + icomm, icomm_id, + n_way, work_id, + opackm, + ipackm, + sub_gemm ); + return thread; +} + +gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_levels ) +{ + + assert(n_levels == 5); + + dim_t jc_way = threads_at_level[0]; + dim_t kc_way = threads_at_level[1]; + dim_t ic_way = threads_at_level[2]; + dim_t jr_way = threads_at_level[3]; + dim_t ir_way = threads_at_level[4]; + + dim_t global_num_threads = jc_way * kc_way * ic_way * jr_way * ir_way; + dim_t jc_nt = kc_way * ic_way * jr_way * ir_way; + dim_t kc_nt = ic_way * jr_way * ir_way; + dim_t ic_nt = jr_way * ir_way; + dim_t jr_nt = ir_way; + dim_t ir_nt = 1; + + gemm_thrinfo_t* paths = (gemm_thrinfo_t*) malloc( global_num_threads * sizeof( gemm_thrinfo_t ) ); + + thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); + for( int a = 0; a < jc_nt; a++ ) + { + thread_comm_t* jc_comm = bli_create_communicator( jc_nt ); + for( int b = 0; b < kc_nt; b++ ) + { + thread_comm_t* kc_comm = bli_create_communicator( kc_nt ); + for( int c = 0; c < ic_nt; c++ ) + { + thread_comm_t* ic_comm = bli_create_communicator( ic_nt ); + for( int d = 0; d < jr_nt; d++ ) + { + thread_comm_t* jr_comm = bli_create_communicator( jr_nt ); + for( int e = 0; e < jc_nt; e++) + { + thread_comm_t* ir_comm = bli_create_communicator( ir_nt ); + + dim_t ir_comm_id = 0; + dim_t jr_comm_id = e*ir_nt + ir_comm_id; + dim_t ic_comm_id = d*jr_nt + jr_comm_id; + dim_t kc_comm_id = c*ic_nt + ic_comm_id; + dim_t jc_comm_id = b*kc_nt + kc_comm_id; + dim_t global_comm_id = a*jc_nt + jc_comm_id; + + gemm_thrinfo_t* ir_info = bli_create_gemm_thrinfo_node( jr_comm, jr_comm_id, + ir_comm, ir_comm_id, + ir_way, e, + NULL, NULL, NULL); + + gemm_thrinfo_t* jr_info = bli_create_gemm_thrinfo_node( ic_comm, ic_comm_id, + jr_comm, jr_comm_id, + jr_way, d, + NULL, NULL, ir_info); + + packm_thrinfo_t* packb = bli_create_packm_thread_info( kc_comm, kc_comm_id, + ic_comm, ic_comm_id, + kc_nt, kc_comm_id ); + + packm_thrinfo_t* packa = bli_create_packm_thread_info( ic_comm, ic_comm_id, + jr_comm, jr_comm_id, + ic_nt, ic_comm_id ); + + gemm_thrinfo_t* ic_info = bli_create_gemm_thrinfo_node( kc_comm, kc_comm_id, + ic_comm, ic_comm_id, + ic_way, c, + packb, packa, jr_info); + + gemm_thrinfo_t* kc_info = bli_create_gemm_thrinfo_node( jc_comm, jc_comm_id, + kc_comm, kc_comm_id, + kc_way, b, + NULL, NULL, ic_info); + + gemm_thrinfo_t* jc_info = &paths[global_comm_id]; + bli_setup_gemm_thrinfo_node( jc_info, global_comm, global_comm_id, + jr_comm, jr_comm_id, + jr_way, a, + NULL, NULL, kc_info); + } + } + } + } + } + return paths; +} diff --git a/frame/3/gemm/bli_gemm_threading.h b/frame/3/gemm/bli_gemm_threading.h new file mode 100644 index 000000000..784a4b9ef --- /dev/null +++ b/frame/3/gemm/bli_gemm_threading.h @@ -0,0 +1,73 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + + +struct gemm_thrinfo_s //implements thrinfo_t +{ + thread_comm_t* ocomm; //The thread communicator for the other threads sharing the same work at this level + dim_t ocomm_id; //Our thread id within that thread comm + thread_comm_t* icomm; //The thread communicator for the other threads sharing the same work at this level + dim_t icomm_id; //Our thread id within that thread comm + + dim_t n_way; //Number of distinct caucuses used to parallelize the loop + dim_t work_id; //What we're working on + + packm_thrinfo_t* opackm; + packm_thrinfo_t* ipackm; + struct gemm_thrinfo_s* sub_gemm; +}; +typedef struct gemm_thrinfo_s gemm_thrinfo_t; + +#define gemm_thread_sub_gemm( thread ) thread->sub_gemm +#define gemm_thread_sub_opackm( thread ) thread->opackm +#define gemm_thread_sub_ipackm( thread ) thread->ipackm + +gemm_thrinfo_t* bli_create_gemm_thrinfo_paths( dim_t* threads_at_level, dim_t n_levels ); + +void bli_setup_gemm_thrinfo_node( gemm_thrinfo_t* thread, + thread_comm_t* ocomm, dim_t ocomm_id, + thread_comm_t* icomm, dim_t icomm_id, + dim_t n_way, dim_t work_id, + packm_thrinfo_t* opackm, + packm_thrinfo_t* ipackm, + gemm_thrinfo_t* sub_gemm ); + +gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_id, + thread_comm_t* icomm, dim_t icomm_id, + dim_t n_way, dim_t work_id, + packm_thrinfo_t* opackm, + packm_thrinfo_t* ipackm, + gemm_thrinfo_t* sub_gemm ); + +void bli_setup_gemm_single_threaded_info( gemm_thrinfo_t* thread );