From 41694675e4cb56e2e0323c7a7db48e0819606a31 Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Wed, 13 Apr 2016 15:51:08 -0500 Subject: [PATCH] pthreads bugfixes Getting pthreads to work on my Mac Implemented a pthread barrier when _POSIX_BARRIER isn't defined Now spawn n-1 threads instead of n threads so that master thread isn't just spinning the whole time Add -lpthread instead of -pthread to LDFLAGS (for clang) --- common.mk | 6 ++-- frame/base/bli_threading_pthreads.c | 51 ++++++++++++++++++++++++++--- frame/base/bli_threading_pthreads.h | 14 ++++---- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/common.mk b/common.mk index d33fee4a6..7c3d4606d 100644 --- a/common.mk +++ b/common.mk @@ -161,7 +161,7 @@ LDFLAGS += -fopenmp endif ifeq ($(THREADING_MODEL),pthreads) CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS -LDFLAGS += -pthread +LDFLAGS += -lpthread endif endif @@ -175,7 +175,7 @@ LDFLAGS += -openmp endif ifeq ($(THREADING_MODEL),pthreads) CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS -LDFLAGS += -pthread +LDFLAGS += -lpthread endif endif @@ -188,7 +188,7 @@ $(error OpenMP is not supported with Clang.) endif ifeq ($(THREADING_MODEL),pthreads) CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS -LDFLAGS += -pthread +LDFLAGS += -lpthread endif endif diff --git a/frame/base/bli_threading_pthreads.c b/frame/base/bli_threading_pthreads.c index d76756191..e22d2f7f3 100644 --- a/frame/base/bli_threading_pthreads.c +++ b/frame/base/bli_threading_pthreads.c @@ -36,11 +36,52 @@ #ifdef BLIS_ENABLE_PTHREADS +#ifndef _POSIX_BARRIERS + +int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count) +{ + if( barrier == NULL ) return 0; + barrier->n_threads = count; + barrier->sense = 0; + barrier->threads_arrived = 0; + pthread_mutex_init( &barrier->mutex, NULL ); + return 0; +} + +int pthread_barrier_destroy(pthread_barrier_t *barrier) +{ + if( barrier == NULL ) return 0; + pthread_mutex_destroy( &barrier->mutex ); + return 0; +} + +int pthread_barrier_wait(pthread_barrier_t *barrier) +{ + if(barrier == NULL || barrier->n_threads == 1) return 0; + bool_t my_sense = barrier->sense; + dim_t my_threads_arrived; + + pthread_mutex_lock( &barrier->mutex ); + my_threads_arrived = ++(barrier->threads_arrived); + pthread_mutex_unlock( &barrier->mutex ); + + if( my_threads_arrived == barrier->n_threads ) { + barrier->threads_arrived = 0; + barrier->sense = !barrier->sense; + } + else { + volatile bool_t* listener = &barrier->sense; + while( *listener == my_sense ) {} + } + return 0; +} +#endif + void* thread_decorator_helper( void* data_void ); typedef struct thread_data { - level3_int_t func; + l3_int_t func; obj_t* alpha; obj_t* a; obj_t* b; @@ -85,11 +126,9 @@ void bli_level3_thread_decorator ) { pthread_t* pthreads = (pthread_t*) bli_malloc(sizeof(pthread_t) * n_threads); - //Saying "datas" is kind of like saying "all y'all" thread_data_t* datas = (thread_data_t*) bli_malloc(sizeof(thread_data_t) * n_threads); - //pthread_attr_t* attr = (pthread_attr_t*) bli_malloc(sizeof(pthread_attr_t) * n_threads); - for( int i = 0; i < n_threads; i++ ) + for( int i = 1; i < n_threads; i++ ) { //Setup the thread data datas[i].func = func; @@ -105,7 +144,9 @@ void bli_level3_thread_decorator pthread_create( &pthreads[i], NULL, &thread_decorator_helper, &datas[i] ); } - for( int i = 0; i < n_threads; i++) + func( alpha, a, b, beta, c, cntx, cntl, thread[0] ); + + for( int i = 1; i < n_threads; i++) { pthread_join( pthreads[i], NULL ); } diff --git a/frame/base/bli_threading_pthreads.h b/frame/base/bli_threading_pthreads.h index 802875c44..91d12ae05 100644 --- a/frame/base/bli_threading_pthreads.h +++ b/frame/base/bli_threading_pthreads.h @@ -39,15 +39,17 @@ #include -#ifdef __APPLE__ +#if !_POSIX_BARRIER typedef int pthread_barrierattr_t; -typedef struct + +struct pthread_barrier_s { pthread_mutex_t mutex; - pthread_cond_t cond; - int count; - int tripCount; -} pthread_barrier_t; + bool_t sense; + dim_t threads_arrived; + dim_t n_threads; +}; +typedef struct pthread_barrier_s pthread_barrier_t; int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count);