From 574de9e29e2dffd56f566a2a3d124f4ea4e1e522 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Mon, 17 Feb 2020 14:57:25 -0600 Subject: [PATCH] Fixed bug(s) in mt sup when single-threaded. Details: - Fixed a syntax bug in bli_l3_sup_decor_single.c as a result of changing function interface for the thread entry point function (of type l3supint_t). - Unfortunately, fixing the interface was not enough, as it caused a memory leak in the sba at bli_finalize() time. It turns out that, due to the new multithreading-capable variant code useing thrinfo_t objects--specifically, their calling of bli_thrinfo_grow()--we have to pass in a real thrinfo_t object rather than the global objects &BLIS_PACKM_SINGLE_THREADED or &BLIS_GEMM_SINGLE_THREADED. Thus, I inserted the appropriate logic from the OpenMP and pthreads versions so that single-threaded execution would work as intended with the newly upgraded variants. Change-Id: I2bfff849abf3fa30c73e0c5876128400854bbcb5 --- frame/thread/bli_l3_sup_decor_single.c | 37 ++++---------------------- 1 file changed, 5 insertions(+), 32 deletions(-) diff --git a/frame/thread/bli_l3_sup_decor_single.c b/frame/thread/bli_l3_sup_decor_single.c index c4eceae16..5ea701cc4 100644 --- a/frame/thread/bli_l3_sup_decor_single.c +++ b/frame/thread/bli_l3_sup_decor_single.c @@ -91,17 +91,14 @@ err_t bli_l3_sup_thread_decorator // resize the array_t, if necessary. array_t* restrict array = bli_sba_checkout_array( n_threads ); - // Access the pool_t* for thread 0 and embed it into the rntm. We do - // this up-front only so that we can create the global comm below. + // Access the pool_t* for thread 0 and embed it into the rntm. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. bli_membrk_rntm_set_membrk( rntm ); -#if 0 // Allcoate a global communicator for the root thrinfo_t structures. thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); -#endif { @@ -109,10 +106,7 @@ err_t bli_l3_sup_thread_decorator // it was already copied in one of the high-level oapi functions. rntm_t* restrict rntm_p = rntm; - cntl_t* cntl_use = NULL; - //thrinfo_t* thread = NULL; - thrinfo_t* thread = &BLIS_PACKM_SINGLE_THREADED; - + // There is only one thread id (for the thief thread). const dim_t tid = 0; // Use the thread id to access the appropriate pool_t* within the @@ -123,24 +117,10 @@ err_t bli_l3_sup_thread_decorator // this is redundant since it's already been done above. //bli_sba_rntm_set_pool( tid, array, rntm_p ); - // NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't - // need to alias objects for A, B, and C since they were already aliased - // in bli_*_front(). However, we may add aliasing here in the future so - // that, with all three (_single.c, _openmp.c, _pthreads.c) implementations - // consistently providing local aliases, we can then eliminate aliasing - // elsewhere. - - // Create a default control tree for the operation, if needed. - //bli_l3_cntl_create_if( family, schema_a, schema_b, - // a, b, c, rntm_p, cntl, &cntl_use ); -#if 0 - cntl_use = bli_gemm_cntl_create( rntm_p, family, schema_a, schema_b ); + thrinfo_t* thread = NULL; // Create the root node of the thread's thrinfo_t structure. - bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread ); -#endif - - ( void )tid; + bli_l3_sup_thrinfo_create_root( tid, gl_comm, rntm_p, &thread ); func ( @@ -151,18 +131,11 @@ err_t bli_l3_sup_thread_decorator c, cntx, rntm_p, - cntl_use, thread ); -#if 0 - // Free the thread's local control tree. - //bli_l3_cntl_free( rntm_p, cntl_use, thread ); - bli_gemm_cntl_free( rntm_p, cntl_use, thread ); - // Free the current thread's thrinfo_t structure. - bli_l3_thrinfo_free( rntm_p, thread ); -#endif + bli_l3_sup_thrinfo_free( rntm_p, thread ); } // We shouldn't free the global communicator since it was already freed