From ceee2f973ebe115beca55ca77f9e3ce36b14c28a Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Mon, 24 Jun 2019 17:47:40 -0500 Subject: [PATCH] Fixed thrinfo_t printing bug for small problems. Details: - Fixed a bug in bli_l3_thrinfo_print_gemm_paths() and bli_l3_thrinfo_print_trsm_paths(), defined in bli_l3_thrinfo.c, whereby subnodes of the thrinfo_t tree are "dereferenced" near the beginning of the functions, which may lead to segfaults in certain situations where the thread tree was not fully formed because the matrix problem was too small for the level of parallelism specified. (That is, too small because some problems were assigned no work due to the smallest units in the m and n dimensions being defined by the register blocksizes mr and nr.) The fix requires several nested levels of if statements, and this is one of those few instances where use of goto statements results in (mostly) prettier code, especially in the case of _gemm_paths(). And while it wasn't necessary, I ported this goto usage to the loop body that prints the thrinfo_t work_id and comm_id values for each thread. Thanks to Nicholai Tukanov for helping to find this bug. --- frame/3/bli_l3_thrinfo.c | 475 +++++++++++++++++++++++++-------------- 1 file changed, 304 insertions(+), 171 deletions(-) diff --git a/frame/3/bli_l3_thrinfo.c b/frame/3/bli_l3_thrinfo.c index 1d876d50f..4f073cb20 100644 --- a/frame/3/bli_l3_thrinfo.c +++ b/frame/3/bli_l3_thrinfo.c @@ -99,35 +99,84 @@ void bli_l3_thrinfo_print_gemm_paths thrinfo_t** threads ) { + // In order to query the number of threads, we query the only thread we + // know exists: thread 0. dim_t n_threads = bli_thread_num_threads( threads[0] ); - dim_t gl_id; - thrinfo_t* jc_info = threads[0]; - thrinfo_t* pc_info = bli_thrinfo_sub_node( jc_info ); - thrinfo_t* pb_info = bli_thrinfo_sub_node( pc_info ); - thrinfo_t* ic_info = bli_thrinfo_sub_node( pb_info ); - thrinfo_t* pa_info = bli_thrinfo_sub_node( ic_info ); - thrinfo_t* jr_info = bli_thrinfo_sub_node( pa_info ); - thrinfo_t* ir_info = bli_thrinfo_sub_node( jr_info ); + // For the purposes of printing the "header" information that is common + // to the various instances of a thrinfo_t (ie: across all threads), we + // choose the last thread in case the problem is so small that there is + // only an "edge" case, which will always be assigned to the last thread + // (at least for higher levels of partitioning). + thrinfo_t* jc_info = threads[n_threads-1]; + thrinfo_t* pc_info = NULL; + thrinfo_t* pb_info = NULL; + thrinfo_t* ic_info = NULL; + thrinfo_t* pa_info = NULL; + thrinfo_t* jr_info = NULL; + thrinfo_t* ir_info = NULL; - dim_t jc_way = bli_thread_n_way( jc_info ); - dim_t pc_way = bli_thread_n_way( pc_info ); - dim_t pb_way = bli_thread_n_way( pb_info ); - dim_t ic_way = bli_thread_n_way( ic_info ); - dim_t pa_way = bli_thread_n_way( pa_info ); - dim_t jr_way = bli_thread_n_way( jr_info ); - dim_t ir_way = bli_thread_n_way( ir_info ); + // Initialize the n_ways and n_threads fields of each thrinfo_t "level" + // to -1. More than likely, these will all be overwritten with meaningful + // values, but in case some thrinfo_t trees are not fully built (see + // next commnet), these will be the placeholder values. + dim_t jc_way = -1, pc_way = -1, pb_way = -1, ic_way = -1, + pa_way = -1, jr_way = -1, ir_way = -1; - dim_t jc_nt = bli_thread_num_threads( jc_info ); - dim_t pc_nt = bli_thread_num_threads( pc_info ); - dim_t pb_nt = bli_thread_num_threads( pb_info ); - dim_t ic_nt = bli_thread_num_threads( ic_info ); - dim_t pa_nt = bli_thread_num_threads( pa_info ); - dim_t jr_nt = bli_thread_num_threads( jr_info ); - dim_t ir_nt = bli_thread_num_threads( ir_info ); + dim_t jc_nt = -1, pc_nt = -1, pb_nt = -1, ic_nt = -1, + pa_nt = -1, jr_nt = -1, ir_nt = -1; + + // NOTE: We must check each thrinfo_t pointer for NULLness. Certain threads + // may not fully build their thrinfo_t structures--specifically when the + // dimension being parallelized is not large enough for each thread to have + // even one unit of work (where as unit is usually a single micropanel's + // width, MR or NR). + + if ( !jc_info ) goto print_header; + + jc_way = bli_thread_n_way( jc_info ); + jc_nt = bli_thread_num_threads( jc_info ); + pc_info = bli_thrinfo_sub_node( jc_info ); + + if ( !pc_info ) goto print_header; + + pc_way = bli_thread_n_way( pc_info ); + pc_nt = bli_thread_num_threads( pc_info ); + pb_info = bli_thrinfo_sub_node( pc_info ); + + if ( !pb_info ) goto print_header; + + pb_way = bli_thread_n_way( pb_info ); + pb_nt = bli_thread_num_threads( pb_info ); + ic_info = bli_thrinfo_sub_node( pb_info ); + + if ( !ic_info ) goto print_header; + + ic_way = bli_thread_n_way( ic_info ); + ic_nt = bli_thread_num_threads( ic_info ); + pa_info = bli_thrinfo_sub_node( ic_info ); + + if ( !pa_info ) goto print_header; + + pa_way = bli_thread_n_way( pa_info ); + pa_nt = bli_thread_num_threads( pa_info ); + jr_info = bli_thrinfo_sub_node( pa_info ); + + if ( !jr_info ) goto print_header; + + jr_way = bli_thread_n_way( jr_info ); + jr_nt = bli_thread_num_threads( jr_info ); + ir_info = bli_thrinfo_sub_node( jr_info ); + + if ( !ir_info ) goto print_header; + + ir_way = bli_thread_n_way( ir_info ); + ir_nt = bli_thread_num_threads( ir_info ); + + print_header: printf( " jc kc pb ic pa jr ir\n" ); - printf( "xx_nt: %4lu %4lu %4lu %4lu %4lu %4lu %4lu\n", + printf( "xx_nt: %4ld %4ld %4ld %4ld %4ld %4ld %4ld\n", ( unsigned long )jc_nt, ( unsigned long )pc_nt, ( unsigned long )pb_nt, @@ -135,7 +184,7 @@ void bli_l3_thrinfo_print_gemm_paths ( unsigned long )pa_nt, ( unsigned long )jr_nt, ( unsigned long )ir_nt ); - printf( "xx_way: %4lu %4lu %4lu %4lu %4lu %4lu %4lu\n", + printf( "xx_way: %4ld %4ld %4ld %4ld %4ld %4ld %4ld\n", ( unsigned long )jc_way, ( unsigned long )pc_way, ( unsigned long )pb_way, @@ -145,116 +194,59 @@ void bli_l3_thrinfo_print_gemm_paths ( unsigned long )ir_way ); printf( "============================================\n" ); - dim_t jc_comm_id; - dim_t pc_comm_id; - dim_t pb_comm_id; - dim_t ic_comm_id; - dim_t pa_comm_id; - dim_t jr_comm_id; - dim_t ir_comm_id; - - dim_t jc_work_id; - dim_t pc_work_id; - dim_t pb_work_id; - dim_t ic_work_id; - dim_t pa_work_id; - dim_t jr_work_id; - dim_t ir_work_id; - - for ( gl_id = 0; gl_id < n_threads; ++gl_id ) + for ( dim_t gl_id = 0; gl_id < n_threads; ++gl_id ) { jc_info = threads[gl_id]; - // NOTE: We must check each thrinfo_t pointer for NULLness. Certain threads - // may not fully build their thrinfo_t structures--specifically when the - // dimension being parallelized is not large enough for each thread to have - // even one unit of work (where as unit is usually a single micropanel's - // width, MR or NR). - if ( !jc_info ) - { - jc_comm_id = pc_comm_id = pb_comm_id = ic_comm_id = pa_comm_id = jr_comm_id = ir_comm_id = -1; - jc_work_id = pc_work_id = pb_work_id = ic_work_id = pa_work_id = jr_work_id = ir_work_id = -1; - } - else - { - jc_comm_id = bli_thread_ocomm_id( jc_info ); - jc_work_id = bli_thread_work_id( jc_info ); - pc_info = bli_thrinfo_sub_node( jc_info ); + dim_t jc_comm_id = -1, pc_comm_id = -1, pb_comm_id = -1, ic_comm_id = -1, + pa_comm_id = -1, jr_comm_id = -1, ir_comm_id = -1; - if ( !pc_info ) - { - pc_comm_id = pb_comm_id = ic_comm_id = pa_comm_id = jr_comm_id = ir_comm_id = -1; - pc_work_id = pb_work_id = ic_work_id = pa_work_id = jr_work_id = ir_work_id = -1; - } - else - { - pc_comm_id = bli_thread_ocomm_id( pc_info ); - pc_work_id = bli_thread_work_id( pc_info ); - pb_info = bli_thrinfo_sub_node( pc_info ); + dim_t jc_work_id = -1, pc_work_id = -1, pb_work_id = -1, ic_work_id = -1, + pa_work_id = -1, jr_work_id = -1, ir_work_id = -1; - if ( !pb_info ) - { - pb_comm_id = ic_comm_id = pa_comm_id = jr_comm_id = ir_comm_id = -1; - pb_work_id = ic_work_id = pa_work_id = jr_work_id = ir_work_id = -1; - } - else - { - pb_comm_id = bli_thread_ocomm_id( pb_info ); - pb_work_id = bli_thread_work_id( pb_info ); - ic_info = bli_thrinfo_sub_node( pb_info ); + if ( !jc_info ) goto print_thrinfo; - if ( !ic_info ) - { - ic_comm_id = pa_comm_id = jr_comm_id = ir_comm_id = -1; - ic_work_id = pa_work_id = jr_work_id = ir_work_id = -1; - } - else - { - ic_comm_id = bli_thread_ocomm_id( ic_info ); - ic_work_id = bli_thread_work_id( ic_info ); - pa_info = bli_thrinfo_sub_node( ic_info ); + jc_comm_id = bli_thread_ocomm_id( jc_info ); + jc_work_id = bli_thread_work_id( jc_info ); + pc_info = bli_thrinfo_sub_node( jc_info ); - if ( !pa_info ) - { - pa_comm_id = jr_comm_id = ir_comm_id = -1; - pa_work_id = jr_work_id = ir_work_id = -1; - } - else - { - pa_comm_id = bli_thread_ocomm_id( pa_info ); - pa_work_id = bli_thread_work_id( pa_info ); - jr_info = bli_thrinfo_sub_node( pa_info ); + if ( !pc_info ) goto print_thrinfo; - if ( !jr_info ) - { - jr_comm_id = ir_comm_id = -1; - jr_work_id = ir_work_id = -1; - } - else - { - jr_comm_id = bli_thread_ocomm_id( jr_info ); - jr_work_id = bli_thread_work_id( jr_info ); - ir_info = bli_thrinfo_sub_node( jr_info ); + pc_comm_id = bli_thread_ocomm_id( pc_info ); + pc_work_id = bli_thread_work_id( pc_info ); + pb_info = bli_thrinfo_sub_node( pc_info ); - if ( !ir_info ) - { - ir_comm_id = -1; - ir_work_id = -1; - } - else - { - ir_comm_id = bli_thread_ocomm_id( ir_info ); - ir_work_id = bli_thread_work_id( ir_info ); - } - } - } - } - } - } - } + if ( !pb_info ) goto print_thrinfo; + + pb_comm_id = bli_thread_ocomm_id( pb_info ); + pb_work_id = bli_thread_work_id( pb_info ); + ic_info = bli_thrinfo_sub_node( pb_info ); + + if ( !ic_info ) goto print_thrinfo; + + ic_comm_id = bli_thread_ocomm_id( ic_info ); + ic_work_id = bli_thread_work_id( ic_info ); + pa_info = bli_thrinfo_sub_node( ic_info ); + + if ( !pa_info ) goto print_thrinfo; + + pa_comm_id = bli_thread_ocomm_id( pa_info ); + pa_work_id = bli_thread_work_id( pa_info ); + jr_info = bli_thrinfo_sub_node( pa_info ); + + if ( !jr_info ) goto print_thrinfo; + + jr_comm_id = bli_thread_ocomm_id( jr_info ); + jr_work_id = bli_thread_work_id( jr_info ); + ir_info = bli_thrinfo_sub_node( jr_info ); + + if ( !ir_info ) goto print_thrinfo; + + ir_comm_id = bli_thread_ocomm_id( ir_info ); + ir_work_id = bli_thread_work_id( ir_info ); + + print_thrinfo: - //printf( " gl jc pb kc pa ic jr \n" ); - //printf( " gl jc kc pb ic pa jr \n" ); printf( "comm ids: %4ld %4ld %4ld %4ld %4ld %4ld %4ld\n", ( long )jc_comm_id, ( long )pc_comm_id, @@ -285,44 +277,105 @@ void bli_l3_thrinfo_print_trsm_paths thrinfo_t** threads ) { + // In order to query the number of threads, we query the only thread we + // know exists: thread 0. dim_t n_threads = bli_thread_num_threads( threads[0] ); - dim_t gl_id; - thrinfo_t* jc_info = threads[0]; - thrinfo_t* pc_info = bli_thrinfo_sub_node( jc_info ); - thrinfo_t* pb_info = bli_thrinfo_sub_node( pc_info ); - thrinfo_t* ic_info = bli_thrinfo_sub_node( pb_info ); + // For the purposes of printing the "header" information that is common + // to the various instances of a thrinfo_t (ie: across all threads), we + // choose the last thread in case the problem is so small that there is + // only an "edge" case, which will always be assigned to the last thread + // (at least for higher levels of partitioning). + thrinfo_t* jc_info = threads[n_threads-1]; + thrinfo_t* pc_info = NULL; + thrinfo_t* pb_info = NULL; + thrinfo_t* ic_info = NULL; + thrinfo_t* pa_info = NULL; thrinfo_t* pa_info0 = NULL; + thrinfo_t* jr_info = NULL; thrinfo_t* jr_info0 = NULL; + thrinfo_t* ir_info = NULL; thrinfo_t* ir_info0 = NULL; - thrinfo_t* pa_info = bli_thrinfo_sub_node( ic_info ); - thrinfo_t* jr_info = bli_thrinfo_sub_node( pa_info ); - thrinfo_t* ir_info = bli_thrinfo_sub_node( jr_info ); - thrinfo_t* pa_info0 = bli_thrinfo_sub_prenode( ic_info ); - thrinfo_t* jr_info0 = ( pa_info0 ? bli_thrinfo_sub_node( pa_info0 ) : NULL ); - thrinfo_t* ir_info0 = ( jr_info0 ? bli_thrinfo_sub_node( jr_info0 ) : NULL ); + // Initialize the n_ways and n_threads fields of each thrinfo_t "level" + // to -1. More than likely, these will all be overwritten with meaningful + // values, but in case some thrinfo_t trees are not fully built (see + // next commnet), these will be the placeholder values. + dim_t jc_way = -1, pc_way = -1, pb_way = -1, ic_way = -1, + pa_way = -1, jr_way = -1, ir_way = -1, + pa_way0 = -1, jr_way0 = -1, ir_way0 = -1; - dim_t jc_way = bli_thread_n_way( jc_info ); - dim_t pc_way = bli_thread_n_way( pc_info ); - dim_t pb_way = bli_thread_n_way( pb_info ); - dim_t ic_way = bli_thread_n_way( ic_info ); + dim_t jc_nt = -1, pc_nt = -1, pb_nt = -1, ic_nt = -1, + pa_nt = -1, jr_nt = -1, ir_nt = -1, + pa_nt0 = -1, jr_nt0 = -1, ir_nt0 = -1; - dim_t pa_way = bli_thread_n_way( pa_info ); - dim_t jr_way = bli_thread_n_way( jr_info ); - dim_t ir_way = bli_thread_n_way( ir_info ); - dim_t pa_way0 = ( pa_info0 ? bli_thread_n_way( pa_info0 ) : -1 ); - dim_t jr_way0 = ( jr_info0 ? bli_thread_n_way( jr_info0 ) : -1 ); - dim_t ir_way0 = ( ir_info0 ? bli_thread_n_way( ir_info0 ) : -1 ); + // NOTE: We must check each thrinfo_t pointer for NULLness. Certain threads + // may not fully build their thrinfo_t structures--specifically when the + // dimension being parallelized is not large enough for each thread to have + // even one unit of work (where as unit is usually a single micropanel's + // width, MR or NR). - dim_t jc_nt = bli_thread_num_threads( jc_info ); - dim_t pc_nt = bli_thread_num_threads( pc_info ); - dim_t pb_nt = bli_thread_num_threads( pb_info ); - dim_t ic_nt = bli_thread_num_threads( ic_info ); + if ( !jc_info ) goto print_header; - dim_t pa_nt = bli_thread_num_threads( pa_info ); - dim_t jr_nt = bli_thread_num_threads( jr_info ); - dim_t ir_nt = bli_thread_num_threads( ir_info ); - dim_t pa_nt0 = ( pa_info0 ? bli_thread_num_threads( pa_info0 ) : -1 ); - dim_t jr_nt0 = ( jr_info0 ? bli_thread_num_threads( jr_info0 ) : -1 ); - dim_t ir_nt0 = ( ir_info0 ? bli_thread_num_threads( ir_info0 ) : -1 ); + jc_way = bli_thread_n_way( jc_info ); + jc_nt = bli_thread_num_threads( jc_info ); + pc_info = bli_thrinfo_sub_node( jc_info ); + + if ( !pc_info ) goto print_header; + + pc_way = bli_thread_n_way( pc_info ); + pc_nt = bli_thread_num_threads( pc_info ); + pb_info = bli_thrinfo_sub_node( pc_info ); + + if ( !pb_info ) goto print_header; + + pb_way = bli_thread_n_way( pb_info ); + pb_nt = bli_thread_num_threads( pb_info ); + ic_info = bli_thrinfo_sub_node( pb_info ); + + if ( !ic_info ) goto print_header; + + ic_way = bli_thread_n_way( ic_info ); + ic_nt = bli_thread_num_threads( ic_info ); + pa_info = bli_thrinfo_sub_node( ic_info ); + pa_info0 = bli_thrinfo_sub_prenode( ic_info ); + + // check_header_prenode: + + if ( !pa_info0 ) goto check_header_node; + + pa_way0 = bli_thread_n_way( pa_info0 ); + pa_nt0 = bli_thread_num_threads( pa_info0 ); + jr_info0 = bli_thrinfo_sub_node( pa_info0 ); + + if ( !jr_info0 ) goto check_header_node; + + jr_way0 = bli_thread_n_way( jr_info0 ); + jr_nt0 = bli_thread_num_threads( jr_info0 ); + ir_info0 = bli_thrinfo_sub_node( jr_info0 ); + + if ( !ir_info0 ) goto check_header_node; + + ir_way0 = bli_thread_n_way( ir_info0 ); + ir_nt0 = bli_thread_num_threads( ir_info0 ); + + check_header_node: + + if ( !pa_info ) goto print_header; + + pa_way = bli_thread_n_way( pa_info ); + pa_nt = bli_thread_num_threads( pa_info ); + jr_info = bli_thrinfo_sub_node( pa_info ); + + if ( !jr_info ) goto print_header; + + jr_way = bli_thread_n_way( jr_info ); + jr_nt = bli_thread_num_threads( jr_info ); + ir_info = bli_thrinfo_sub_node( jr_info ); + + if ( !ir_info ) goto print_header; + + ir_way = bli_thread_n_way( ir_info ); + ir_nt = bli_thread_num_threads( ir_info ); + + print_header: printf( " jc kc pb ic pa jr ir\n" ); printf( "xx_nt: %4ld %4ld %4ld %4ld %2ld|%2ld %2ld|%2ld %2ld|%2ld\n", @@ -343,26 +396,105 @@ void bli_l3_thrinfo_print_trsm_paths ( long )ir_way0, ( long )ir_way ); printf( "==================================================\n" ); - dim_t jc_comm_id; - dim_t pc_comm_id; - dim_t pb_comm_id; - dim_t ic_comm_id; - dim_t pa_comm_id0, pa_comm_id; - dim_t jr_comm_id0, jr_comm_id; - dim_t ir_comm_id0, ir_comm_id; - dim_t jc_work_id; - dim_t pc_work_id; - dim_t pb_work_id; - dim_t ic_work_id; - dim_t pa_work_id0, pa_work_id; - dim_t jr_work_id0, jr_work_id; - dim_t ir_work_id0, ir_work_id; - - for ( gl_id = 0; gl_id < n_threads; ++gl_id ) + for ( dim_t gl_id = 0; gl_id < n_threads; ++gl_id ) { jc_info = threads[gl_id]; +#if 1 + // NOTE: This cpp branch contains code that is safe to execute + // for small problems that are parallelized enough that one or + // more threads gets no work. + + dim_t jc_comm_id = -1, pc_comm_id = -1, pb_comm_id = -1, ic_comm_id = -1, + pa_comm_id = -1, jr_comm_id = -1, ir_comm_id = -1, + pa_comm_id0 = -1, jr_comm_id0 = -1, ir_comm_id0 = -1; + + dim_t jc_work_id = -1, pc_work_id = -1, pb_work_id = -1, ic_work_id = -1, + pa_work_id = -1, jr_work_id = -1, ir_work_id = -1, + pa_work_id0 = -1, jr_work_id0 = -1, ir_work_id0 = -1; + + if ( !jc_info ) goto print_thrinfo; + + jc_comm_id = bli_thread_ocomm_id( jc_info ); + jc_work_id = bli_thread_work_id( jc_info ); + pc_info = bli_thrinfo_sub_node( jc_info ); + + if ( !pc_info ) goto print_thrinfo; + + pc_comm_id = bli_thread_ocomm_id( pc_info ); + pc_work_id = bli_thread_work_id( pc_info ); + pb_info = bli_thrinfo_sub_node( pc_info ); + + if ( !pb_info ) goto print_thrinfo; + + pb_comm_id = bli_thread_ocomm_id( pb_info ); + pb_work_id = bli_thread_work_id( pb_info ); + ic_info = bli_thrinfo_sub_node( pb_info ); + + if ( !ic_info ) goto print_thrinfo; + + ic_comm_id = bli_thread_ocomm_id( ic_info ); + ic_work_id = bli_thread_work_id( ic_info ); + pa_info = bli_thrinfo_sub_node( ic_info ); + pa_info0 = bli_thrinfo_sub_prenode( ic_info ); + + // check_thrinfo_prenode: + + if ( !pa_info0 ) goto check_thrinfo_node; + + pa_comm_id0 = bli_thread_ocomm_id( pa_info0 ); + pa_work_id0 = bli_thread_work_id( pa_info0 ); + jr_info0 = bli_thrinfo_sub_node( pa_info0 ); + + if ( !jr_info0 ) goto check_thrinfo_node; + + jr_comm_id0 = bli_thread_ocomm_id( jr_info0 ); + jr_work_id0 = bli_thread_work_id( jr_info0 ); + ir_info0 = bli_thrinfo_sub_node( jr_info0 ); + + if ( !ir_info0 ) goto check_thrinfo_node; + + ir_comm_id0 = bli_thread_ocomm_id( ir_info0 ); + ir_work_id0 = bli_thread_work_id( ir_info0 ); + + check_thrinfo_node: + + if ( !pa_info ) goto print_thrinfo; + + pa_comm_id = bli_thread_ocomm_id( pa_info ); + pa_work_id = bli_thread_work_id( pa_info ); + jr_info = bli_thrinfo_sub_node( pa_info ); + + if ( !jr_info ) goto print_thrinfo; + + jr_comm_id = bli_thread_ocomm_id( jr_info ); + jr_work_id = bli_thread_work_id( jr_info ); + ir_info = bli_thrinfo_sub_node( jr_info ); + + if ( !ir_info ) goto print_thrinfo; + + ir_comm_id = bli_thread_ocomm_id( ir_info ); + ir_work_id = bli_thread_work_id( ir_info ); + + print_thrinfo: +#else + dim_t jc_comm_id; + dim_t pc_comm_id; + dim_t pb_comm_id; + dim_t ic_comm_id; + dim_t pa_comm_id0, pa_comm_id; + dim_t jr_comm_id0, jr_comm_id; + dim_t ir_comm_id0, ir_comm_id; + + dim_t jc_work_id; + dim_t pc_work_id; + dim_t pb_work_id; + dim_t ic_work_id; + dim_t pa_work_id0, pa_work_id; + dim_t jr_work_id0, jr_work_id; + dim_t ir_work_id0, ir_work_id; + // NOTE: We must check each thrinfo_t pointer for NULLness. Certain threads // may not fully build their thrinfo_t structures--specifically when the // dimension being parallelized is not large enough for each thread to have @@ -488,6 +620,7 @@ void bli_l3_thrinfo_print_trsm_paths } } } +#endif printf( "comm ids: %4ld %4ld %4ld %4ld %2ld|%2ld %2ld|%2ld %2ld|%2ld\n", ( long )jc_comm_id,