DTL logging fixes and improvements (5)

More improvements to DTL coverage and coding:
- Expand logging and tracing coverage to IxAMIN and GEMM_BATCH APIs
- Expand logging and performance states to GEMM3M APIs
- Expand logging coverage to matrix copy, transpose and add APIs
- Misc tidying of code

AMD-Internal: [CPUPL-7010]
This commit is contained in:
Smyth, Edward
2025-08-20 11:37:03 +01:00
committed by GitHub
parent 509aa07785
commit 0b9e846fee
11 changed files with 469 additions and 23 deletions

View File

@@ -478,6 +478,95 @@ void AOCL_DTL_log_trsm_stats(int8 loglevel,
// Level-3 Extension Logging
void AOCL_DTL_log_gemm3m_sizes(int8 loglevel,
char dt_type,
const f77_char transa,
const f77_char transb,
const f77_int m,
const f77_int n,
const f77_int k,
const void *alpha,
const f77_int lda,
const f77_int ldb,
const void *beta,
const f77_int ldc,
const char *filename,
const char *function_name,
int line)
{
char buffer[256];
double alpha_real = 0.0;
double alpha_imag = 0.0;
double beta_real = 0.0;
double beta_imag = 0.0;
DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag);
DTL_get_complex_parts(dt_type, beta, &beta_real, &beta_imag);
// Ordering as per cblas/blas interfaces
// {S, D, C, Z} transa, transb, m, n, k, alpha_real, alpha_imag,
// lda, ldb, beta_real, beta_imag, ldc
sprintf(buffer, "%c %c %c %ld %ld %ld %lf %lf %ld %ld %lf %lf %ld",
tolower(dt_type),
transa, transb,
(dim_t)m, (dim_t)n, (dim_t)k,
alpha_real, alpha_imag,
(inc_t)lda, (inc_t)ldb,
beta_real, beta_imag,
(inc_t)ldc);
AOCL_DTL_START_PERF_TIMER();
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
}
void AOCL_DTL_log_gemm3m_stats(int8 loglevel,
char dt_type,
const f77_int m,
const f77_int n,
const f77_int k)
{
char buffer[256];
// Execution time is in micro seconds.
Double execution_time = AOCL_DTL_get_time_spent();
double flops = 2.0 * m * n * k;
if (dt_type == 'c' || dt_type == 'C' || dt_type == 'z' || dt_type == 'Z')
{
flops = 4.0 * flops;
}
if (execution_time != 0.0)
sprintf(buffer, " nt=%ld %.3f ms %0.3f GFLOPS",
AOCL_get_requested_threads_count(),
execution_time/1000.0,
flops/(execution_time * 1e3));
else
sprintf(buffer, " nt=%ld %.3f ms",
AOCL_get_requested_threads_count(),
execution_time/1000.0);
DTL_Trace(loglevel, TRACE_TYPE_RAW, NULL, NULL, 0, buffer);
}
void AOCL_DTL_log_gemm_batch_sizes(int8 loglevel,
char dt_type,
const f77_int group_count,
const char *filename,
const char *function_name,
int line)
{
char buffer[256];
// Ordering as per cblas/blas interfaces
// {S, D, C, Z} identifier, group_count
sprintf(buffer, "%c %ld\n", tolower(dt_type),
(dim_t)group_count);
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
}
void AOCL_DTL_log_gemm_get_size_sizes(int8 loglevel,
char dt_type,
const f77_char identifier,
@@ -1288,6 +1377,21 @@ void AOCL_DTL_log_rotmg_sizes(int8 loglevel,
// Level-1 Logging
void AOCL_DTL_log_amin_sizes(int8 loglevel,
char dt_type,
const f77_int n,
const f77_int incx,
const char *filename,
const char *function_name,
int line)
{
char buffer[256];
// {S, D, C, Z} {n, incx}
sprintf(buffer, "%c %ld %ld\n", tolower(dt_type), (dim_t)n, (dim_t)incx);
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
}
void AOCL_DTL_log_amax_sizes(int8 loglevel,
char dt_type,
const f77_int n,
@@ -1486,4 +1590,115 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
}
// Matrix Copy and Transpose Logging
void AOCL_DTL_log_matadd_sizes(int8 loglevel,
char dt_type,
const f77_char transa,
const f77_char transb,
const f77_int m,
const f77_int n,
const void* alpha,
const f77_int lda,
const void* beta,
const f77_int ldb,
const f77_int ldc,
const char* filename,
const char* function_name,
int line)
{
char buffer[256];
double alpha_real = 0.0;
double alpha_imag = 0.0;
double beta_real = 0.0;
double beta_imag = 0.0;
DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag);
DTL_get_complex_parts(dt_type, beta, &beta_real, &beta_imag);
// Ordering as per cblas/blas interfaces
// {S, D, C, Z} transa, transb, m, n, alpha_real, alpha_imag,
// lda, beta_real, beta_imag, ldb, ldc
sprintf(buffer, "%c %c %c %ld %ld %lf %lf %ld %lf %lf %ld %ld",
tolower(dt_type),
transa, transb,
(dim_t)m, (dim_t)n,
alpha_real, alpha_imag, (inc_t)lda,
beta_real, beta_imag, (inc_t)ldb,
(inc_t)ldc);
AOCL_DTL_START_PERF_TIMER();
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
}
void AOCL_DTL_log_matcopy_sizes(int8 loglevel,
char dt_type,
const f77_char trans,
const f77_int rows,
const f77_int cols,
const void* alpha,
const f77_int lda,
const f77_int ldb,
const char* filename,
const char* function_name,
int line)
{
char buffer[256];
double alpha_real = 0.0;
double alpha_imag = 0.0;
DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag);
// Ordering as per cblas/blas interfaces
// {S, D, C, Z} trans, rows, cols, alpha_real, alpha_imag,
// lda, ldb
sprintf(buffer, "%c %c %ld %ld %lf %lf %ld %ld",
tolower(dt_type), trans,
(dim_t)rows, (dim_t)cols,
alpha_real, alpha_imag,
(inc_t)lda, (inc_t)ldb);
AOCL_DTL_START_PERF_TIMER();
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
}
void AOCL_DTL_log_matcopy2_sizes(int8 loglevel,
char dt_type,
const f77_char trans,
const f77_int rows,
const f77_int cols,
const void* alpha,
const f77_int lda,
const f77_int stridea,
const f77_int ldb,
const f77_int strideb,
const char* filename,
const char* function_name,
int line)
{
char buffer[256];
double alpha_real = 0.0;
double alpha_imag = 0.0;
DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag);
// Ordering as per cblas/blas interfaces
// {S, D, C, Z} trans, rows, cols, alpha_real, alpha_imag,
// lda, stridea, ldb, strideb
sprintf(buffer, "%c %c %ld %ld %lf %lf %ld %ld %ld %ld",
tolower(dt_type), trans,
(dim_t)rows, (dim_t)cols,
alpha_real, alpha_imag,
(inc_t)lda, (inc_t)stridea,
(inc_t)ldb, (inc_t)strideb);
AOCL_DTL_START_PERF_TIMER();
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
}
#endif

View File

@@ -186,6 +186,35 @@ void AOCL_DTL_log_trsm_stats(int8 loglevel,
// Level-3 Extension Logging
void AOCL_DTL_log_gemm3m_sizes(int8 loglevel,
char dt_type,
const f77_char transa,
const f77_char transb,
const f77_int m,
const f77_int n,
const f77_int k,
const void *alpha,
const f77_int lda,
const f77_int ldb,
const void *beta,
const f77_int ldc,
const char *filename,
const char *function_name,
int line);
void AOCL_DTL_log_gemm3m_stats(int8 loglevel,
char dt_type,
const f77_int m,
const f77_int n,
const f77_int k);
void AOCL_DTL_log_gemm_batch_sizes(int8 loglevel,
char dt_type,
const f77_int group_count,
const char *filename,
const char *function_name,
int line);
void AOCL_DTL_log_gemm_get_size_sizes(int8 loglevel,
char dt_type,
const f77_char identifer,
@@ -560,6 +589,14 @@ void AOCL_DTL_log_rotmg_sizes(int8 loglevel,
// Level-1 Logging
void AOCL_DTL_log_amin_sizes(int8 loglevel,
char dt_type,
const f77_int n,
const f77_int incx,
const char* filename,
const char* function_name,
int line);
void AOCL_DTL_log_amax_sizes(int8 loglevel,
char dt_type,
const f77_int n,
@@ -648,6 +685,49 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
const char* function_name,
int line);
// Matrix Copy and Transpose Logging
void AOCL_DTL_log_matadd_sizes(int8 loglevel,
char dt_type,
const f77_char transa,
const f77_char transb,
const f77_int m,
const f77_int n,
const void* alpha,
const f77_int lda,
const void* beta,
const f77_int ldb,
const f77_int ldc,
const char* filename,
const char* function_name,
int line);
void AOCL_DTL_log_matcopy_sizes(int8 loglevel,
char dt_type,
const f77_char trans,
const f77_int rows,
const f77_int cols,
const void* alpha,
const f77_int lda,
const f77_int ldb,
const char* filename,
const char* function_name,
int line);
void AOCL_DTL_log_matcopy2_sizes(int8 loglevel,
char dt_type,
const f77_char trans,
const f77_int rows,
const f77_int cols,
const void* alpha,
const f77_int lda,
const f77_int stridea,
const f77_int ldb,
const f77_int strideb,
const char* filename,
const char* function_name,
int line);
// Level-3 Macros
#define AOCL_DTL_LOG_GEMM_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc) \
@@ -714,6 +794,20 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
// Level-3 Extension Macros
#define AOCL_DTL_LOG_GEMM3M_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc) \
if (gbIsLoggingEnabled) \
AOCL_DTL_log_gemm3m_sizes(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc, \
__FILE__, __FUNCTION__, __LINE__);
#define AOCL_DTL_LOG_GEMM3M_STATS(loglevel, dt_type, m, n, k) \
if (gbIsLoggingEnabled) \
AOCL_DTL_log_gemm3m_stats(loglevel, dt_type, m, n, k);
#define AOCL_DTL_LOG_GEMM_BATCH_INPUTS(loglevel, dt, group_count) \
if (gbIsLoggingEnabled) \
AOCL_DTL_log_gemm_batch_sizes(loglevel, dt, group_count, \
__FILE__, __FUNCTION__, __LINE__);
#define AOCL_DTL_LOG_GEMM_GET_SIZE_INPUTS(loglevel, dt, identifier, m, n, k) \
if (gbIsLoggingEnabled) \
AOCL_DTL_log_gemm_get_size_sizes(loglevel, dt, identifier, m, n, k, \
@@ -871,6 +965,10 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
// Level-1 Macros
#define AOCL_DTL_LOG_AMIN_INPUTS(loglevel, dt_type, n, incx) \
if (gbIsLoggingEnabled) \
AOCL_DTL_log_amin_sizes(loglevel, dt_type, n, incx, __FILE__, __FUNCTION__, __LINE__);
#define AOCL_DTL_LOG_AMAX_INPUTS(loglevel, dt_type, n, incx) \
if (gbIsLoggingEnabled) \
AOCL_DTL_log_amax_sizes(loglevel, dt_type, n, incx, __FILE__, __FUNCTION__, __LINE__);
@@ -913,6 +1011,19 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
if (gbIsLoggingEnabled) \
AOCL_DTL_log_swap_sizes(loglevel, dt_type, n, incx, incy, __FILE__,__FUNCTION__,__LINE__);
// Matrix Copy and Transpose Macros
#define AOCL_DTL_LOG_MATADD_INPUTS(loglevel, dt_type, transa, transb, m, n, alpha, lda, beta, ldb, ldc ) \
if (gbIsLoggingEnabled) \
AOCL_DTL_log_matadd_sizes(loglevel, dt_type, transa, transb, m, n, alpha, lda, beta, ldb, ldc, __FILE__,__FUNCTION__,__LINE__);
#define AOCL_DTL_LOG_MATCOPY_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, ldb ) \
if (gbIsLoggingEnabled) \
AOCL_DTL_log_matcopy_sizes(loglevel, dt_type, trans, rows, cols, alpha, lda, ldb, __FILE__,__FUNCTION__,__LINE__);
#define AOCL_DTL_LOG_MATCOPY2_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, stridea, ldb, strideb ) \
if (gbIsLoggingEnabled) \
AOCL_DTL_log_matcopy2_sizes(loglevel, dt_type, trans, rows, cols, alpha, lda, stridea, ldb, strideb, __FILE__,__FUNCTION__,__LINE__);
#else // AOCL_DTL_LOG_ENABLE
@@ -946,6 +1057,12 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
// Level-3 Extension Macros
#define AOCL_DTL_LOG_GEMM3M_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc)
#define AOCL_DTL_LOG_GEMM3M_STATS(loglevel, dt_type, m, n, k)
#define AOCL_DTL_LOG_GEMM_BATCH_INPUTS(loglevel, dt, group_count)
#define AOCL_DTL_LOG_GEMM_GET_SIZE_INPUTS(loglevel, dt, identifier, m, n, k)
#define AOCL_DTL_LOG_GEMM_PACK_INPUTS(loglevel, dt, identifier, trans, m, n, k, alpha, pld)
@@ -1016,6 +1133,8 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
// Level-1 Macros
#define AOCL_DTL_LOG_AMIN_INPUTS(loglevel, dt_type, n, incx)
#define AOCL_DTL_LOG_AMAX_INPUTS(loglevel, dt_type, n, incx)
#define AOCL_DTL_LOG_ASUM_INPUTS(loglevel, dt_type, n, incx)
@@ -1036,6 +1155,14 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
#define AOCL_DTL_LOG_SWAP_INPUTS(loglevel, dt_type, n, incx, incy)
// Matrix Copy and Transpose Macros
#define AOCL_DTL_LOG_MATADD_INPUTS(loglevel, dt_type, transa, transb, m, n, alpha, lda, beta, ldb, ldc )
#define AOCL_DTL_LOG_MATCOPY_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, ldb )
#define AOCL_DTL_LOG_MATCOPY2_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, stridea, ldb, strideb )
#endif // AOCL_DTL_LOG_ENABLE
#endif //ndef __AOCLDTL_BLIS_H

View File

@@ -96,10 +96,10 @@ f77_int PASTEF772S(i,chx,blasname) \
integer size, that typecast occurs here. */ \
f77_index = bli_index + 1; \
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
return f77_index; \
}\
\

View File

@@ -47,6 +47,12 @@ f77_int PASTEF772S(i,chx,blasname) \
const ftype_x* x, const f77_int* incx \
) \
{ \
/* Initialize BLIS. */ \
bli_init_auto(); \
\
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
AOCL_DTL_LOG_AMIN_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(chx), *n, *incx) \
\
dim_t n0; \
ftype_x* x0; \
inc_t incx0; \
@@ -57,10 +63,10 @@ f77_int PASTEF772S(i,chx,blasname) \
is needed to emulate netlib BLAS. Without it, bli_?aminv() will
return 0, which ends up getting incremented to 1 (below) before
being returned, which is not what we want. */ \
if ( *n < 1 || *incx <= 0 ) return 0; \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
if ( *n < 1 || *incx <= 0 ) { \
AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "iamin_: vector empty") \
return 0; \
}\
\
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
@@ -84,6 +90,7 @@ f77_int PASTEF772S(i,chx,blasname) \
integer size, that typecast occurs here. */ \
f77_index = bli_index + 1; \
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
\

View File

@@ -70,7 +70,7 @@ void PASTEF77S(ch,blasname) \
\
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
\
/* Perform BLAS parameter checking. */ \
PASTEBLACHK(blasname) \
@@ -214,7 +214,7 @@ void PASTEF77S(ch,blasname) \
\
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
\
/* Perform BLAS parameter checking. */ \
PASTEBLACHK(blasname) \

View File

@@ -69,6 +69,8 @@ void PASTEF77S(ch,blasname) \
bli_init_auto(); \
\
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
AOCL_DTL_LOG_GEMM3M_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
\
/* Perform BLAS parameter checking. */ \
PASTEBLACHK(blasname) \
@@ -89,6 +91,7 @@ void PASTEF77S(ch,blasname) \
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
@@ -145,6 +148,7 @@ void PASTEF77S(ch,blasname) \
); \
} \
\
AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
@@ -195,6 +199,8 @@ void PASTEF77S(ch,blasname) \
bli_init_auto(); \
\
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
AOCL_DTL_LOG_GEMM3M_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
\
/* Perform BLAS parameter checking. */ \
PASTEBLACHK(blasname) \
@@ -215,6 +221,7 @@ void PASTEF77S(ch,blasname) \
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
@@ -293,6 +300,7 @@ void PASTEF77S(ch,blasname) \
} \
\
\
AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \
/* Finalize BLIS. */ \
bli_finalize_auto(); \

View File

@@ -165,9 +165,13 @@ void simatcopy_blis_impl
f77_int* ldb
)
{
//printf("I am from simatcopy_\n");
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *trans, *rows, *cols,
(void*)alpha, *lda, *ldb );
if ( !( *trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||
@@ -251,8 +255,13 @@ void dimatcopy_blis_impl
f77_int* ldb
)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *trans, *rows, *cols,
(void*)alpha, *lda, *ldb );
if ( !( *trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||
@@ -336,8 +345,13 @@ void cimatcopy_blis_impl
f77_int* ldb
)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *trans, *rows, *cols,
(void*)alpha, *lda, *ldb );
if ( !( *trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||
@@ -421,8 +435,13 @@ void zimatcopy_blis_impl
f77_int* ldb
)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *trans, *rows, *cols,
(void*)alpha, *lda, *ldb );
if ( !( *trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||

View File

@@ -96,8 +96,12 @@ static void bli_zconjugate(dcomplex* A,dim_t cols,dim_t rows)
void somatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *transa, *transb, *m, *n,
(void*)alpha, *lda, (void*)beta, *ldb, *ldc );
if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1)
{
@@ -175,8 +179,13 @@ void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
void domatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *transa, *transb, *m, *n,
(void*)alpha, *lda, (void*)beta, *ldb, *ldc );
if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1)
{
bli_print_msg( " Invalid function parameters domatadd_() .", __FILE__, __LINE__ );
@@ -253,8 +262,13 @@ void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
void comatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *transa, *transb, *m, *n,
(void*)alpha, *lda, (void*)beta, *ldb, *ldc );
if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1)
{
bli_print_msg( " Invalid function parameters comatadd_() .", __FILE__, __LINE__ );
@@ -344,8 +358,13 @@ void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
void zomatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *transa, *transb, *m, *n,
(void*)alpha, *lda, (void*)beta, *ldb, *ldc );
if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1)
{
bli_print_msg( " Invalid function parameters zomatadd_() .", __FILE__, __LINE__ );

View File

@@ -60,7 +60,14 @@ static dim_t bli_zoMatCopy_cc(dim_t rows, dim_t cols, const dcomplex alpha, cons
void somatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *trans, *rows, *cols,
(void*)alpha, *lda, *ldb );
if ( !(*trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||
@@ -108,8 +115,13 @@ void somatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alp
void domatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *trans, *rows, *cols,
(void*)alpha, *lda, *ldb );
if ( !(*trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||
@@ -157,8 +169,13 @@ void domatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* al
void comatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *trans, *rows, *cols,
(void*)alpha, *lda, *ldb );
if ( !(*trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||
@@ -207,8 +224,13 @@ void comatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex*
void zomatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *trans, *rows, *cols,
(void*)alpha, *lda, *ldb );
if ( !(*trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||

View File

@@ -204,8 +204,13 @@ void somatcopy2_blis_impl
f77_int* strideb
)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *trans, *rows, *cols,
(void*)alpha, *lda, *stridea, *ldb, *strideb );
if ( !(*trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||
@@ -296,8 +301,13 @@ void domatcopy2_blis_impl
f77_int* strideb
)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *trans, *rows, *cols,
(void*)alpha, *lda, *stridea, *ldb, *strideb );
if ( !( *trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||
@@ -388,8 +398,13 @@ void comatcopy2_blis_impl
f77_int* strideb
)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *trans, *rows, *cols,
(void*)alpha, *lda, *stridea, *ldb, *strideb );
if ( !( *trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||
@@ -480,8 +495,13 @@ void zomatcopy2_blis_impl
f77_int* strideb
)
{
/* Initialize BLIS. */
// Call to bli_init_auto() is not needed here
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *trans, *rows, *cols,
(void*)alpha, *lda, *stridea, *ldb, *strideb );
if ( !( *trans == 'n' || *trans == 'N' ||
*trans == 't' || *trans == 'T' ||
*trans == 'c' || *trans == 'C' ||

View File

@@ -69,6 +69,9 @@ void PASTEF77S(ch,blasname) \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
\
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
AOCL_DTL_LOG_GEMM_BATCH_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *group_count); \
\
/* Perform BLAS parameter checking. */ \
f77_int count; \
@@ -133,6 +136,8 @@ void PASTEF77S(ch,blasname) \
} \
} \
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
} \
IF_BLIS_ENABLE_BLAS(\
@@ -184,6 +189,9 @@ void PASTEF77S(ch,blasname) \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
\
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
AOCL_DTL_LOG_GEMM_BATCH_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *group_count); \
\
/* Perform BLAS parameter checking. */ \
f77_int count; \
@@ -266,6 +274,7 @@ void PASTEF77S(ch,blasname) \
} \
} \
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
} \