diff --git a/frame/thread/bli_l3_sup_decor.h b/frame/thread/bli_l3_sup_decor.h index 8069463b6..01dfb8f6e 100644 --- a/frame/thread/bli_l3_sup_decor.h +++ b/frame/thread/bli_l3_sup_decor.h @@ -71,8 +71,8 @@ err_t bli_l3_sup_thread_decorator // Include definitions specific to the method of multithreading for the // sup code path. #include "bli_l3_sup_decor_single.h" -//#include "bli_l3_sup_decor_openmp.h" -//#include "bli_l3_sup_decor_pthreads.h" +#include "bli_l3_sup_decor_openmp.h" +#include "bli_l3_sup_decor_pthreads.h" #endif diff --git a/frame/thread/bli_l3_sup_decor_openmp.c b/frame/thread/bli_l3_sup_decor_openmp.c new file mode 100644 index 000000000..7808c1333 --- /dev/null +++ b/frame/thread/bli_l3_sup_decor_openmp.c @@ -0,0 +1,190 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#ifdef BLIS_ENABLE_OPENMP + +// Define a dummy function bli_l3_sup_thread_entry(), which is needed in the +// pthreads version, so that when building Windows DLLs (with OpenMP enabled +// or no multithreading) we don't risk having an unresolved symbol. +//void* bli_l3_sup_thread_entry( void* data_void ) { return NULL; } + + + +err_t bli_l3_sup_thread_decorator + ( + l3supint_t func, + opid_t family, + //pack_t schema_a, + //pack_t schema_b, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c, + cntx_t* cntx, + rntm_t* rntm + ) +{ +#if 0 + + return + bli_gemmsup_int + ( + alpha, + a, + b, + beta, + c, + cntx, + rntm, + 0 + ); + +#else + + // This is part of a hack to support mixed domain in bli_gemm_front(). + // Sometimes we need to specify a non-standard schema for A and B, and + // we decided to transmit them via the schema field in the obj_t's + // rather than pass them in as function parameters. Once the values + // have been read, we immediately reset them back to their expected + // values for unpacked objects. + //pack_t schema_a = bli_obj_pack_schema( a ); + //pack_t schema_b = bli_obj_pack_schema( b ); + //bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); + //bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); + + // For sequential execution, we use only one thread. + const dim_t n_threads = 1; + + // NOTE: The sba was initialized in bli_init(). + + // Check out an array_t from the small block allocator. This is done + // with an internal lock to ensure only one application thread accesses + // the sba at a time. bli_sba_checkout_array() will also automatically + // resize the array_t, if necessary. + array_t* restrict array = bli_sba_checkout_array( n_threads ); + + // Access the pool_t* for thread 0 and embed it into the rntm. We do + // this up-front only so that we can create the global comm below. + bli_sba_rntm_set_pool( 0, array, rntm ); + + // Set the packing block allocator field of the rntm. + bli_membrk_rntm_set_membrk( rntm ); + +#if 0 + // Allcoate a global communicator for the root thrinfo_t structures. + thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); +#endif + + + { + // NOTE: We don't need to create another copy of the rntm_t since + // it was already copied in one of the high-level oapi functions. + rntm_t* restrict rntm_p = rntm; + + cntl_t* cntl_use = NULL; + //thrinfo_t* thread = NULL; + thrinfo_t* thread = &BLIS_PACKM_SINGLE_THREADED; + + const dim_t tid = 0; + + // Use the thread id to access the appropriate pool_t* within the + // array_t, and use it to set the sba_pool field within the rntm_t. + // If the pool_t* element within the array_t is NULL, it will first + // be allocated/initialized. + // NOTE: This is commented out because, in the single-threaded case, + // this is redundant since it's already been done above. + //bli_sba_rntm_set_pool( tid, array, rntm_p ); + + // NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't + // need to alias objects for A, B, and C since they were already aliased + // in bli_*_front(). However, we may add aliasing here in the future so + // that, with all three (_single.c, _openmp.c, _pthreads.c) implementations + // consistently providing local aliases, we can then eliminate aliasing + // elsewhere. + + // Create a default control tree for the operation, if needed. + //bli_l3_cntl_create_if( family, schema_a, schema_b, + // a, b, c, rntm_p, cntl, &cntl_use ); +#if 0 + cntl_use = bli_gemm_cntl_create( rntm_p, family, schema_a, schema_b ); + + // Create the root node of the thread's thrinfo_t structure. + bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread ); +#endif + + ( void )tid; + + func + ( + alpha, + a, + b, + beta, + c, + cntx, + rntm_p, + cntl_use, + thread + ); + +#if 0 + // Free the thread's local control tree. + //bli_l3_cntl_free( rntm_p, cntl_use, thread ); + bli_gemm_cntl_free( rntm_p, cntl_use, thread ); + + // Free the current thread's thrinfo_t structure. + bli_l3_thrinfo_free( rntm_p, thread ); +#endif + } + + // We shouldn't free the global communicator since it was already freed + // by the global communicator's chief thread in bli_l3_thrinfo_free() + // (called above). + + // Check the array_t back into the small block allocator. Similar to the + // check-out, this is done using a lock embedded within the sba to ensure + // mutual exclusion. + bli_sba_checkin_array( array ); + + return BLIS_SUCCESS; + +#endif +} + +#endif + diff --git a/frame/thread/bli_l3_sup_decor_openmp.h b/frame/thread/bli_l3_sup_decor_openmp.h new file mode 100644 index 000000000..1d1097a82 --- /dev/null +++ b/frame/thread/bli_l3_sup_decor_openmp.h @@ -0,0 +1,44 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_L3_SUP_DECOR_OPENMP_H +#define BLIS_L3_SUP_DECOR_OPENMP_H + +// Definitions specific to situations when OpenMP multithreading is enabled. +#ifdef BLIS_ENABLE_OPENMP + +#endif + +#endif + diff --git a/frame/thread/bli_l3_sup_decor_pthreads.c b/frame/thread/bli_l3_sup_decor_pthreads.c new file mode 100644 index 000000000..7cbf65381 --- /dev/null +++ b/frame/thread/bli_l3_sup_decor_pthreads.c @@ -0,0 +1,183 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#ifdef BLIS_ENABLE_PTHREADS + +err_t bli_l3_sup_thread_decorator + ( + l3supint_t func, + opid_t family, + //pack_t schema_a, + //pack_t schema_b, + obj_t* alpha, + obj_t* a, + obj_t* b, + obj_t* beta, + obj_t* c, + cntx_t* cntx, + rntm_t* rntm + ) +{ +#if 0 + + return + bli_gemmsup_int + ( + alpha, + a, + b, + beta, + c, + cntx, + rntm, + 0 + ); + +#else + + // This is part of a hack to support mixed domain in bli_gemm_front(). + // Sometimes we need to specify a non-standard schema for A and B, and + // we decided to transmit them via the schema field in the obj_t's + // rather than pass them in as function parameters. Once the values + // have been read, we immediately reset them back to their expected + // values for unpacked objects. + //pack_t schema_a = bli_obj_pack_schema( a ); + //pack_t schema_b = bli_obj_pack_schema( b ); + //bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); + //bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); + + // For sequential execution, we use only one thread. + const dim_t n_threads = 1; + + // NOTE: The sba was initialized in bli_init(). + + // Check out an array_t from the small block allocator. This is done + // with an internal lock to ensure only one application thread accesses + // the sba at a time. bli_sba_checkout_array() will also automatically + // resize the array_t, if necessary. + array_t* restrict array = bli_sba_checkout_array( n_threads ); + + // Access the pool_t* for thread 0 and embed it into the rntm. We do + // this up-front only so that we can create the global comm below. + bli_sba_rntm_set_pool( 0, array, rntm ); + + // Set the packing block allocator field of the rntm. + bli_membrk_rntm_set_membrk( rntm ); + +#if 0 + // Allcoate a global communicator for the root thrinfo_t structures. + thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); +#endif + + + { + // NOTE: We don't need to create another copy of the rntm_t since + // it was already copied in one of the high-level oapi functions. + rntm_t* restrict rntm_p = rntm; + + cntl_t* cntl_use = NULL; + //thrinfo_t* thread = NULL; + thrinfo_t* thread = &BLIS_PACKM_SINGLE_THREADED; + + const dim_t tid = 0; + + // Use the thread id to access the appropriate pool_t* within the + // array_t, and use it to set the sba_pool field within the rntm_t. + // If the pool_t* element within the array_t is NULL, it will first + // be allocated/initialized. + // NOTE: This is commented out because, in the single-threaded case, + // this is redundant since it's already been done above. + //bli_sba_rntm_set_pool( tid, array, rntm_p ); + + // NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't + // need to alias objects for A, B, and C since they were already aliased + // in bli_*_front(). However, we may add aliasing here in the future so + // that, with all three (_single.c, _openmp.c, _pthreads.c) implementations + // consistently providing local aliases, we can then eliminate aliasing + // elsewhere. + + // Create a default control tree for the operation, if needed. + //bli_l3_cntl_create_if( family, schema_a, schema_b, + // a, b, c, rntm_p, cntl, &cntl_use ); +#if 0 + cntl_use = bli_gemm_cntl_create( rntm_p, family, schema_a, schema_b ); + + // Create the root node of the thread's thrinfo_t structure. + bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread ); +#endif + + ( void )tid; + + func + ( + alpha, + a, + b, + beta, + c, + cntx, + rntm_p, + cntl_use, + thread + ); + +#if 0 + // Free the thread's local control tree. + //bli_l3_cntl_free( rntm_p, cntl_use, thread ); + bli_gemm_cntl_free( rntm_p, cntl_use, thread ); + + // Free the current thread's thrinfo_t structure. + bli_l3_thrinfo_free( rntm_p, thread ); +#endif + } + + // We shouldn't free the global communicator since it was already freed + // by the global communicator's chief thread in bli_l3_thrinfo_free() + // (called above). + + // Check the array_t back into the small block allocator. Similar to the + // check-out, this is done using a lock embedded within the sba to ensure + // mutual exclusion. + bli_sba_checkin_array( array ); + + return BLIS_SUCCESS; + +#endif +} + +#endif + diff --git a/frame/thread/bli_l3_sup_decor_pthreads.h b/frame/thread/bli_l3_sup_decor_pthreads.h new file mode 100644 index 000000000..1362b4035 --- /dev/null +++ b/frame/thread/bli_l3_sup_decor_pthreads.h @@ -0,0 +1,47 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_L3_SUP_DECOR_PTHREADS_H +#define BLIS_L3_SUP_DECOR_PTHREADS_H + +// Definitions specific to situations when POSIX multithreading is enabled. +#ifdef BLIS_ENABLE_PTHREADS + +// Thread entry point prototype. +void* bli_l3_sup_thread_entry( void* data_void ); + +#endif + +#endif +