mirror of
https://github.com/amd/blis.git
synced 2026-04-20 15:48:50 +00:00
Fix DTL dynamic thread logging in BLAS operations (#230)
- Remove redundant AOCL_DTL_LOG_NUM_THREADS calls from early return paths - Update thread count logging to use AOCL_get_requested_threads_count() for early exits - Clean up duplicate DTL logging in gemv_unf_var1 and gemv_unf_var2 implementations - Remove thread count logging from bli_dgemv_n_zen4_int kernel variants - Simplify aocldtl_blis.c AOCL_DTL_log_gemv_sizes by removing redundant conditional - Standardize DTL trace exit patterns across axpy, scal, and gemv operations - Remove commented-out DTL logging code in zen4 gemv kernel This patch ensures thread count is logged only once per operation and uses the correct API (AOCL_get_requested_threads_count) for early exit scenarios where the actual execution thread count may differ from requested threads.
This commit is contained in:
committed by
KR, Chandrashekara
parent
7341b12c46
commit
b729473839
@@ -717,18 +717,10 @@ void AOCL_DTL_log_gemv_sizes(int8 loglevel,
|
||||
DTL_get_complex_parts(dt_type, beta, &beta_real, &beta_imag);
|
||||
|
||||
// {S, D,C, Z} { transa, m, n, alpha, lda, incx, beta, incy}
|
||||
if (dt_type == 'd' || dt_type == 'D' )
|
||||
{
|
||||
sprintf(buffer, "%c %c %ld %ld %lf %lf %ld %ld %lf %lf %ld", tolower(dt_type),
|
||||
transa, (dim_t)m, (dim_t)n, alpha_real, alpha_imag,
|
||||
(dim_t)lda, (dim_t)incx, beta_real, beta_imag, (dim_t)incy);
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(buffer, "%c %c %ld %ld %lf %lf %ld %ld %lf %lf %ld\n", tolower(dt_type),
|
||||
transa, (dim_t)m, (dim_t)n, alpha_real, alpha_imag,
|
||||
(dim_t)lda, (dim_t)incx, beta_real, beta_imag, (dim_t)incy);
|
||||
}
|
||||
sprintf(buffer, "%c %c %ld %ld %lf %lf %ld %ld %lf %lf %ld\n", tolower(dt_type),
|
||||
transa, (dim_t)m, (dim_t)n, alpha_real, alpha_imag,
|
||||
(dim_t)lda, (dim_t)incx, beta_real, beta_imag, (dim_t)incy);
|
||||
|
||||
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
}
|
||||
|
||||
@@ -247,7 +247,6 @@ void bli_dgemv_unf_var1
|
||||
NULL
|
||||
);
|
||||
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
|
||||
return;
|
||||
}
|
||||
@@ -385,7 +384,6 @@ void bli_dgemv_unf_var1
|
||||
);
|
||||
}
|
||||
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3);
|
||||
return;
|
||||
}
|
||||
@@ -404,7 +402,6 @@ void bli_dgemv_unf_var1
|
||||
cntx
|
||||
);
|
||||
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
|
||||
return;
|
||||
}
|
||||
@@ -495,8 +492,6 @@ void bli_dgemv_unf_var1
|
||||
cntx
|
||||
);
|
||||
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
|
||||
#if defined(BLIS_ENABLE_OPENMP)
|
||||
}
|
||||
else
|
||||
@@ -574,7 +569,6 @@ void bli_dgemv_unf_var1
|
||||
cntx
|
||||
);
|
||||
}
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, nt);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -594,7 +588,7 @@ void bli_dgemv_unf_var1
|
||||
y, incy,
|
||||
NULL
|
||||
);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
|
||||
}
|
||||
|
||||
// If x was packed into x_temp, free the memory.
|
||||
|
||||
@@ -342,8 +342,8 @@ void bli_dgemv_unf_var2 (
|
||||
y, incy,
|
||||
cntx
|
||||
);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3);
|
||||
return;
|
||||
|
||||
default:
|
||||
@@ -477,7 +477,6 @@ void bli_dgemv_unf_var2 (
|
||||
|
||||
if( bli_deq0( *alpha ) )
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3)
|
||||
return;
|
||||
}
|
||||
@@ -522,7 +521,7 @@ void bli_dgemv_unf_var2 (
|
||||
// Return the buffer to pool
|
||||
bli_pba_release(&rntm , &mem_bufY);
|
||||
}
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3);
|
||||
}
|
||||
|
||||
|
||||
@@ -146,7 +146,7 @@ void saxpy_blis_impl
|
||||
*/
|
||||
if ((*n) <= 0 || PASTEMAC(s, eq0)(*alpha))
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count());
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
return;
|
||||
}
|
||||
@@ -280,7 +280,7 @@ void daxpy_blis_impl
|
||||
*/
|
||||
if ((*n) <= 0 || PASTEMAC(d, eq0)(*alpha))
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count());
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
return;
|
||||
}
|
||||
@@ -549,7 +549,7 @@ void caxpy_blis_impl
|
||||
*/
|
||||
if ((*n) <= 0 || PASTEMAC(c, eq0)(*alpha))
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count());
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
return;
|
||||
}
|
||||
@@ -670,7 +670,7 @@ void zaxpy_blis_impl
|
||||
*/
|
||||
if ((*n) <= 0 || PASTEMAC(z, eq0)(*alpha))
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count());
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -216,7 +216,6 @@ void dgemv_blis_impl
|
||||
if ( *m == 0 || *n == 0 || \
|
||||
( PASTEMAC(d,eq0)( *alpha ) && PASTEMAC(d,eq1)( *beta ) ) )
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
// Call to bli_finalize_auto() is not needed here
|
||||
@@ -258,7 +257,6 @@ void dgemv_blis_impl
|
||||
this quirky behavior; it will scale y by beta, as one would expect. */
|
||||
if ( m_y > 0 && n_x == 0 )
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
// Call to bli_finalize_auto() is not needed here
|
||||
@@ -312,7 +310,6 @@ void dgemv_blis_impl
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
// Call to bli_finalize_auto() is not needed here
|
||||
@@ -340,7 +337,7 @@ void dgemv_blis_impl
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
// Call to bli_finalize_auto() is not needed here
|
||||
|
||||
@@ -80,10 +80,10 @@ void PASTEF772S(chx,cha,blasname) \
|
||||
\
|
||||
if ((n0 <= 0) || (alpha == NULL) || (incx0 <= 0) || PASTEMAC(chau, eq1)(*alpha)) \
|
||||
{ \
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count()); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
return ; \
|
||||
} \
|
||||
\
|
||||
@@ -106,7 +106,7 @@ void PASTEF772S(chx,cha,blasname) \
|
||||
NULL \
|
||||
); \
|
||||
\
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1); \
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
@@ -147,7 +147,7 @@ void sscal_blis_impl
|
||||
*/
|
||||
if ((n0 <= 0) || (alpha == NULL) || (incx0 <= 0) || PASTEMAC(s, eq1)(*alpha))
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count());
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
// Call to bli_finalize_auto() is not needed here
|
||||
@@ -238,7 +238,7 @@ void dscal_blis_impl
|
||||
*/
|
||||
if ((n0 <= 0) || (alpha == NULL) || (incx0 <= 0) || PASTEMAC(d, eq1)(*alpha))
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count());
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
// Call to bli_finalize_auto() is not needed here
|
||||
@@ -437,7 +437,7 @@ void zdscal_blis_impl
|
||||
*/
|
||||
if ((n0 <= 0) || (alpha == NULL) || (incx0 <= 0) || PASTEMAC(d, eq1)(*alpha))
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count());
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
// Call to bli_finalize_auto() is not needed here
|
||||
@@ -616,7 +616,7 @@ void cscal_blis_impl
|
||||
*/
|
||||
if ((n0 <= 0) || (alpha == NULL) || (incx0 <= 0) || PASTEMAC(c, eq1)(*alpha))
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count());
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
// Call to bli_finalize_auto() is not needed here
|
||||
@@ -711,7 +711,7 @@ void zscal_blis_impl
|
||||
*/
|
||||
if ((n0 <= 0) || (alpha == NULL) || (incx0 <= 0) || PASTEMAC(z, eq1)(*alpha))
|
||||
{
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, AOCL_get_requested_threads_count());
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
// Call to bli_finalize_auto() is not needed here
|
||||
|
||||
@@ -65,7 +65,6 @@
|
||||
\
|
||||
PASTE_XERBLA( func_str, &info, (ftnlen)6 ); \
|
||||
\
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
\
|
||||
/* Finalize BLIS. */ \
|
||||
|
||||
@@ -341,10 +341,10 @@ BLIS_EXPORT_BLIS void bli_daxpyv_zen4_int
|
||||
_mm_storel_pd(y0, y_vec);
|
||||
}
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
__m512d xv[8], yv[8], alphav;
|
||||
|
||||
// Broadcast the alpha scalar to all elements of a vector register.
|
||||
|
||||
@@ -1179,7 +1179,6 @@ void bli_dgemv_m_zen4_int_40x8_mt_Mdiv
|
||||
y, incy,
|
||||
NULL
|
||||
);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
return;
|
||||
}
|
||||
@@ -1225,7 +1224,7 @@ void bli_dgemv_m_zen4_int_40x8_mt_Mdiv
|
||||
cntx
|
||||
);
|
||||
}
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, nt);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4);
|
||||
} // end of function
|
||||
|
||||
/*
|
||||
@@ -1292,8 +1291,8 @@ void bli_dgemv_m_zen4_int_40x8_mt_Ndiv
|
||||
y, incy,
|
||||
NULL
|
||||
);
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1345,7 +1344,6 @@ void bli_dgemv_m_zen4_int_40x8_mt_Ndiv
|
||||
NULL
|
||||
);
|
||||
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
return;
|
||||
}
|
||||
@@ -1434,7 +1432,7 @@ void bli_dgemv_m_zen4_int_40x8_mt_Ndiv
|
||||
bli_pba_release(&rntm, &local_mem_buf);
|
||||
}
|
||||
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, nt);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1486,7 +1484,7 @@ void bli_dgemv_n_zen4_int (
|
||||
{
|
||||
ker_ft = bli_dgemv_n_zen4_int_32x8_st;
|
||||
}
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -1510,7 +1508,6 @@ void bli_dgemv_n_zen4_int (
|
||||
{
|
||||
ker_ft = bli_dgemv_n_zen4_int_32x8_st;
|
||||
}
|
||||
AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1519,10 +1516,6 @@ void bli_dgemv_n_zen4_int (
|
||||
if ( incy != 1 || transa != BLIS_NO_TRANSPOSE)
|
||||
{
|
||||
ker_ft = bli_dgemv_n_zen4_int_32x8_st;
|
||||
// AOCL_DTL_LOG_NUM_THREADS(AOCL_DTL_LEVEL_TRACE_1, 1);
|
||||
// I am commenting out the above line because
|
||||
// it ends up calling twice sometimes.
|
||||
// Need to fix it later !!
|
||||
}
|
||||
// Call the function pointer
|
||||
ker_ft
|
||||
|
||||
Reference in New Issue
Block a user