mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
DTL logging fixes and improvements (5)
More improvements to DTL coverage and coding: - Expand logging and tracing coverage to IxAMIN and GEMM_BATCH APIs - Expand logging and performance states to GEMM3M APIs - Expand logging coverage to matrix copy, transpose and add APIs - Misc tidying of code AMD-Internal: [CPUPL-7010]
This commit is contained in:
@@ -478,6 +478,95 @@ void AOCL_DTL_log_trsm_stats(int8 loglevel,
|
||||
|
||||
// Level-3 Extension Logging
|
||||
|
||||
void AOCL_DTL_log_gemm3m_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char transa,
|
||||
const f77_char transb,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const f77_int k,
|
||||
const void *alpha,
|
||||
const f77_int lda,
|
||||
const f77_int ldb,
|
||||
const void *beta,
|
||||
const f77_int ldc,
|
||||
const char *filename,
|
||||
const char *function_name,
|
||||
int line)
|
||||
{
|
||||
char buffer[256];
|
||||
|
||||
double alpha_real = 0.0;
|
||||
double alpha_imag = 0.0;
|
||||
double beta_real = 0.0;
|
||||
double beta_imag = 0.0;
|
||||
|
||||
DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag);
|
||||
DTL_get_complex_parts(dt_type, beta, &beta_real, &beta_imag);
|
||||
|
||||
// Ordering as per cblas/blas interfaces
|
||||
// {S, D, C, Z} transa, transb, m, n, k, alpha_real, alpha_imag,
|
||||
// lda, ldb, beta_real, beta_imag, ldc
|
||||
sprintf(buffer, "%c %c %c %ld %ld %ld %lf %lf %ld %ld %lf %lf %ld",
|
||||
tolower(dt_type),
|
||||
transa, transb,
|
||||
(dim_t)m, (dim_t)n, (dim_t)k,
|
||||
alpha_real, alpha_imag,
|
||||
(inc_t)lda, (inc_t)ldb,
|
||||
beta_real, beta_imag,
|
||||
(inc_t)ldc);
|
||||
|
||||
AOCL_DTL_START_PERF_TIMER();
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
}
|
||||
|
||||
void AOCL_DTL_log_gemm3m_stats(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const f77_int k)
|
||||
{
|
||||
char buffer[256];
|
||||
|
||||
// Execution time is in micro seconds.
|
||||
Double execution_time = AOCL_DTL_get_time_spent();
|
||||
|
||||
double flops = 2.0 * m * n * k;
|
||||
if (dt_type == 'c' || dt_type == 'C' || dt_type == 'z' || dt_type == 'Z')
|
||||
{
|
||||
flops = 4.0 * flops;
|
||||
}
|
||||
|
||||
if (execution_time != 0.0)
|
||||
sprintf(buffer, " nt=%ld %.3f ms %0.3f GFLOPS",
|
||||
AOCL_get_requested_threads_count(),
|
||||
execution_time/1000.0,
|
||||
flops/(execution_time * 1e3));
|
||||
else
|
||||
sprintf(buffer, " nt=%ld %.3f ms",
|
||||
AOCL_get_requested_threads_count(),
|
||||
execution_time/1000.0);
|
||||
|
||||
DTL_Trace(loglevel, TRACE_TYPE_RAW, NULL, NULL, 0, buffer);
|
||||
}
|
||||
|
||||
void AOCL_DTL_log_gemm_batch_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int group_count,
|
||||
const char *filename,
|
||||
const char *function_name,
|
||||
int line)
|
||||
{
|
||||
char buffer[256];
|
||||
|
||||
// Ordering as per cblas/blas interfaces
|
||||
// {S, D, C, Z} identifier, group_count
|
||||
sprintf(buffer, "%c %ld\n", tolower(dt_type),
|
||||
(dim_t)group_count);
|
||||
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
}
|
||||
|
||||
void AOCL_DTL_log_gemm_get_size_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char identifier,
|
||||
@@ -1288,6 +1377,21 @@ void AOCL_DTL_log_rotmg_sizes(int8 loglevel,
|
||||
|
||||
// Level-1 Logging
|
||||
|
||||
void AOCL_DTL_log_amin_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int n,
|
||||
const f77_int incx,
|
||||
const char *filename,
|
||||
const char *function_name,
|
||||
int line)
|
||||
{
|
||||
char buffer[256];
|
||||
// {S, D, C, Z} {n, incx}
|
||||
sprintf(buffer, "%c %ld %ld\n", tolower(dt_type), (dim_t)n, (dim_t)incx);
|
||||
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
}
|
||||
|
||||
void AOCL_DTL_log_amax_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int n,
|
||||
@@ -1486,4 +1590,115 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
}
|
||||
|
||||
// Matrix Copy and Transpose Logging
|
||||
|
||||
void AOCL_DTL_log_matadd_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char transa,
|
||||
const f77_char transb,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const void* alpha,
|
||||
const f77_int lda,
|
||||
const void* beta,
|
||||
const f77_int ldb,
|
||||
const f77_int ldc,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line)
|
||||
{
|
||||
char buffer[256];
|
||||
|
||||
double alpha_real = 0.0;
|
||||
double alpha_imag = 0.0;
|
||||
double beta_real = 0.0;
|
||||
double beta_imag = 0.0;
|
||||
|
||||
DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag);
|
||||
DTL_get_complex_parts(dt_type, beta, &beta_real, &beta_imag);
|
||||
|
||||
// Ordering as per cblas/blas interfaces
|
||||
// {S, D, C, Z} transa, transb, m, n, alpha_real, alpha_imag,
|
||||
// lda, beta_real, beta_imag, ldb, ldc
|
||||
sprintf(buffer, "%c %c %c %ld %ld %lf %lf %ld %lf %lf %ld %ld",
|
||||
tolower(dt_type),
|
||||
transa, transb,
|
||||
(dim_t)m, (dim_t)n,
|
||||
alpha_real, alpha_imag, (inc_t)lda,
|
||||
beta_real, beta_imag, (inc_t)ldb,
|
||||
(inc_t)ldc);
|
||||
|
||||
AOCL_DTL_START_PERF_TIMER();
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void AOCL_DTL_log_matcopy_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char trans,
|
||||
const f77_int rows,
|
||||
const f77_int cols,
|
||||
const void* alpha,
|
||||
const f77_int lda,
|
||||
const f77_int ldb,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line)
|
||||
{
|
||||
char buffer[256];
|
||||
|
||||
double alpha_real = 0.0;
|
||||
double alpha_imag = 0.0;
|
||||
|
||||
DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag);
|
||||
|
||||
// Ordering as per cblas/blas interfaces
|
||||
// {S, D, C, Z} trans, rows, cols, alpha_real, alpha_imag,
|
||||
// lda, ldb
|
||||
sprintf(buffer, "%c %c %ld %ld %lf %lf %ld %ld",
|
||||
tolower(dt_type), trans,
|
||||
(dim_t)rows, (dim_t)cols,
|
||||
alpha_real, alpha_imag,
|
||||
(inc_t)lda, (inc_t)ldb);
|
||||
|
||||
AOCL_DTL_START_PERF_TIMER();
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
}
|
||||
|
||||
void AOCL_DTL_log_matcopy2_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char trans,
|
||||
const f77_int rows,
|
||||
const f77_int cols,
|
||||
const void* alpha,
|
||||
const f77_int lda,
|
||||
const f77_int stridea,
|
||||
const f77_int ldb,
|
||||
const f77_int strideb,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line)
|
||||
{
|
||||
char buffer[256];
|
||||
|
||||
double alpha_real = 0.0;
|
||||
double alpha_imag = 0.0;
|
||||
|
||||
DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag);
|
||||
|
||||
// Ordering as per cblas/blas interfaces
|
||||
// {S, D, C, Z} trans, rows, cols, alpha_real, alpha_imag,
|
||||
// lda, stridea, ldb, strideb
|
||||
sprintf(buffer, "%c %c %ld %ld %lf %lf %ld %ld %ld %ld",
|
||||
tolower(dt_type), trans,
|
||||
(dim_t)rows, (dim_t)cols,
|
||||
alpha_real, alpha_imag,
|
||||
(inc_t)lda, (inc_t)stridea,
|
||||
(inc_t)ldb, (inc_t)strideb);
|
||||
|
||||
AOCL_DTL_START_PERF_TIMER();
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -186,6 +186,35 @@ void AOCL_DTL_log_trsm_stats(int8 loglevel,
|
||||
|
||||
// Level-3 Extension Logging
|
||||
|
||||
void AOCL_DTL_log_gemm3m_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char transa,
|
||||
const f77_char transb,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const f77_int k,
|
||||
const void *alpha,
|
||||
const f77_int lda,
|
||||
const f77_int ldb,
|
||||
const void *beta,
|
||||
const f77_int ldc,
|
||||
const char *filename,
|
||||
const char *function_name,
|
||||
int line);
|
||||
|
||||
void AOCL_DTL_log_gemm3m_stats(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const f77_int k);
|
||||
|
||||
void AOCL_DTL_log_gemm_batch_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int group_count,
|
||||
const char *filename,
|
||||
const char *function_name,
|
||||
int line);
|
||||
|
||||
void AOCL_DTL_log_gemm_get_size_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char identifer,
|
||||
@@ -560,6 +589,14 @@ void AOCL_DTL_log_rotmg_sizes(int8 loglevel,
|
||||
|
||||
// Level-1 Logging
|
||||
|
||||
void AOCL_DTL_log_amin_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int n,
|
||||
const f77_int incx,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line);
|
||||
|
||||
void AOCL_DTL_log_amax_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int n,
|
||||
@@ -648,6 +685,49 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
|
||||
const char* function_name,
|
||||
int line);
|
||||
|
||||
// Matrix Copy and Transpose Logging
|
||||
|
||||
void AOCL_DTL_log_matadd_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char transa,
|
||||
const f77_char transb,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const void* alpha,
|
||||
const f77_int lda,
|
||||
const void* beta,
|
||||
const f77_int ldb,
|
||||
const f77_int ldc,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line);
|
||||
|
||||
void AOCL_DTL_log_matcopy_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char trans,
|
||||
const f77_int rows,
|
||||
const f77_int cols,
|
||||
const void* alpha,
|
||||
const f77_int lda,
|
||||
const f77_int ldb,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line);
|
||||
|
||||
void AOCL_DTL_log_matcopy2_sizes(int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_char trans,
|
||||
const f77_int rows,
|
||||
const f77_int cols,
|
||||
const void* alpha,
|
||||
const f77_int lda,
|
||||
const f77_int stridea,
|
||||
const f77_int ldb,
|
||||
const f77_int strideb,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line);
|
||||
|
||||
// Level-3 Macros
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc) \
|
||||
@@ -714,6 +794,20 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
|
||||
|
||||
// Level-3 Extension Macros
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM3M_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc) \
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_gemm3m_sizes(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc, \
|
||||
__FILE__, __FUNCTION__, __LINE__);
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM3M_STATS(loglevel, dt_type, m, n, k) \
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_gemm3m_stats(loglevel, dt_type, m, n, k);
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_BATCH_INPUTS(loglevel, dt, group_count) \
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_gemm_batch_sizes(loglevel, dt, group_count, \
|
||||
__FILE__, __FUNCTION__, __LINE__);
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_GET_SIZE_INPUTS(loglevel, dt, identifier, m, n, k) \
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_gemm_get_size_sizes(loglevel, dt, identifier, m, n, k, \
|
||||
@@ -871,6 +965,10 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
|
||||
|
||||
// Level-1 Macros
|
||||
|
||||
#define AOCL_DTL_LOG_AMIN_INPUTS(loglevel, dt_type, n, incx) \
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_amin_sizes(loglevel, dt_type, n, incx, __FILE__, __FUNCTION__, __LINE__);
|
||||
|
||||
#define AOCL_DTL_LOG_AMAX_INPUTS(loglevel, dt_type, n, incx) \
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_amax_sizes(loglevel, dt_type, n, incx, __FILE__, __FUNCTION__, __LINE__);
|
||||
@@ -913,6 +1011,19 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_swap_sizes(loglevel, dt_type, n, incx, incy, __FILE__,__FUNCTION__,__LINE__);
|
||||
|
||||
// Matrix Copy and Transpose Macros
|
||||
|
||||
#define AOCL_DTL_LOG_MATADD_INPUTS(loglevel, dt_type, transa, transb, m, n, alpha, lda, beta, ldb, ldc ) \
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_matadd_sizes(loglevel, dt_type, transa, transb, m, n, alpha, lda, beta, ldb, ldc, __FILE__,__FUNCTION__,__LINE__);
|
||||
|
||||
#define AOCL_DTL_LOG_MATCOPY_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, ldb ) \
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_matcopy_sizes(loglevel, dt_type, trans, rows, cols, alpha, lda, ldb, __FILE__,__FUNCTION__,__LINE__);
|
||||
|
||||
#define AOCL_DTL_LOG_MATCOPY2_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, stridea, ldb, strideb ) \
|
||||
if (gbIsLoggingEnabled) \
|
||||
AOCL_DTL_log_matcopy2_sizes(loglevel, dt_type, trans, rows, cols, alpha, lda, stridea, ldb, strideb, __FILE__,__FUNCTION__,__LINE__);
|
||||
|
||||
#else // AOCL_DTL_LOG_ENABLE
|
||||
|
||||
@@ -946,6 +1057,12 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
|
||||
|
||||
// Level-3 Extension Macros
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM3M_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc)
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM3M_STATS(loglevel, dt_type, m, n, k)
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_BATCH_INPUTS(loglevel, dt, group_count)
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_GET_SIZE_INPUTS(loglevel, dt, identifier, m, n, k)
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_PACK_INPUTS(loglevel, dt, identifier, trans, m, n, k, alpha, pld)
|
||||
@@ -1016,6 +1133,8 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
|
||||
|
||||
// Level-1 Macros
|
||||
|
||||
#define AOCL_DTL_LOG_AMIN_INPUTS(loglevel, dt_type, n, incx)
|
||||
|
||||
#define AOCL_DTL_LOG_AMAX_INPUTS(loglevel, dt_type, n, incx)
|
||||
|
||||
#define AOCL_DTL_LOG_ASUM_INPUTS(loglevel, dt_type, n, incx)
|
||||
@@ -1036,6 +1155,14 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel,
|
||||
|
||||
#define AOCL_DTL_LOG_SWAP_INPUTS(loglevel, dt_type, n, incx, incy)
|
||||
|
||||
// Matrix Copy and Transpose Macros
|
||||
|
||||
#define AOCL_DTL_LOG_MATADD_INPUTS(loglevel, dt_type, transa, transb, m, n, alpha, lda, beta, ldb, ldc )
|
||||
|
||||
#define AOCL_DTL_LOG_MATCOPY_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, ldb )
|
||||
|
||||
#define AOCL_DTL_LOG_MATCOPY2_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, stridea, ldb, strideb )
|
||||
|
||||
#endif // AOCL_DTL_LOG_ENABLE
|
||||
|
||||
#endif //ndef __AOCLDTL_BLIS_H
|
||||
|
||||
@@ -96,10 +96,10 @@ f77_int PASTEF772S(i,chx,blasname) \
|
||||
integer size, that typecast occurs here. */ \
|
||||
f77_index = bli_index + 1; \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
return f77_index; \
|
||||
}\
|
||||
\
|
||||
|
||||
@@ -47,6 +47,12 @@ f77_int PASTEF772S(i,chx,blasname) \
|
||||
const ftype_x* x, const f77_int* incx \
|
||||
) \
|
||||
{ \
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
AOCL_DTL_LOG_AMIN_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(chx), *n, *incx) \
|
||||
\
|
||||
dim_t n0; \
|
||||
ftype_x* x0; \
|
||||
inc_t incx0; \
|
||||
@@ -57,10 +63,10 @@ f77_int PASTEF772S(i,chx,blasname) \
|
||||
is needed to emulate netlib BLAS. Without it, bli_?aminv() will
|
||||
return 0, which ends up getting incremented to 1 (below) before
|
||||
being returned, which is not what we want. */ \
|
||||
if ( *n < 1 || *incx <= 0 ) return 0; \
|
||||
\
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
if ( *n < 1 || *incx <= 0 ) { \
|
||||
AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "iamin_: vector empty") \
|
||||
return 0; \
|
||||
}\
|
||||
\
|
||||
/* Convert/typecast negative values of n to zero. */ \
|
||||
bli_convert_blas_dim1( *n, n0 ); \
|
||||
@@ -84,6 +90,7 @@ f77_int PASTEF772S(i,chx,blasname) \
|
||||
integer size, that typecast occurs here. */ \
|
||||
f77_index = bli_index + 1; \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
\
|
||||
|
||||
@@ -70,7 +70,7 @@ void PASTEF77S(ch,blasname) \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
|
||||
\
|
||||
/* Perform BLAS parameter checking. */ \
|
||||
PASTEBLACHK(blasname) \
|
||||
@@ -214,7 +214,7 @@ void PASTEF77S(ch,blasname) \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
|
||||
\
|
||||
/* Perform BLAS parameter checking. */ \
|
||||
PASTEBLACHK(blasname) \
|
||||
|
||||
@@ -69,6 +69,8 @@ void PASTEF77S(ch,blasname) \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
AOCL_DTL_LOG_GEMM3M_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
|
||||
\
|
||||
/* Perform BLAS parameter checking. */ \
|
||||
PASTEBLACHK(blasname) \
|
||||
@@ -89,6 +91,7 @@ void PASTEF77S(ch,blasname) \
|
||||
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
|
||||
&& PASTEMAC(ch,eq1)( *beta ) )) \
|
||||
{ \
|
||||
AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
@@ -145,6 +148,7 @@ void PASTEF77S(ch,blasname) \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
@@ -195,6 +199,8 @@ void PASTEF77S(ch,blasname) \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
AOCL_DTL_LOG_GEMM3M_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
|
||||
\
|
||||
/* Perform BLAS parameter checking. */ \
|
||||
PASTEBLACHK(blasname) \
|
||||
@@ -215,6 +221,7 @@ void PASTEF77S(ch,blasname) \
|
||||
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
|
||||
&& PASTEMAC(ch,eq1)( *beta ) )) \
|
||||
{ \
|
||||
AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
@@ -293,6 +300,7 @@ void PASTEF77S(ch,blasname) \
|
||||
} \
|
||||
\
|
||||
\
|
||||
AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
|
||||
@@ -165,9 +165,13 @@ void simatcopy_blis_impl
|
||||
f77_int* ldb
|
||||
)
|
||||
{
|
||||
//printf("I am from simatcopy_\n");
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *ldb );
|
||||
|
||||
if ( !( *trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
@@ -251,8 +255,13 @@ void dimatcopy_blis_impl
|
||||
f77_int* ldb
|
||||
)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *ldb );
|
||||
|
||||
if ( !( *trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
@@ -336,8 +345,13 @@ void cimatcopy_blis_impl
|
||||
f77_int* ldb
|
||||
)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *ldb );
|
||||
|
||||
if ( !( *trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
@@ -421,8 +435,13 @@ void zimatcopy_blis_impl
|
||||
f77_int* ldb
|
||||
)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *ldb );
|
||||
|
||||
if ( !( *trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
|
||||
@@ -96,8 +96,12 @@ static void bli_zconjugate(dcomplex* A,dim_t cols,dim_t rows)
|
||||
|
||||
void somatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *transa, *transb, *m, *n,
|
||||
(void*)alpha, *lda, (void*)beta, *ldb, *ldc );
|
||||
|
||||
if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1)
|
||||
{
|
||||
@@ -175,8 +179,13 @@ void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
|
||||
|
||||
void domatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *transa, *transb, *m, *n,
|
||||
(void*)alpha, *lda, (void*)beta, *ldb, *ldc );
|
||||
|
||||
if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1)
|
||||
{
|
||||
bli_print_msg( " Invalid function parameters domatadd_() .", __FILE__, __LINE__ );
|
||||
@@ -253,8 +262,13 @@ void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
|
||||
|
||||
void comatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *transa, *transb, *m, *n,
|
||||
(void*)alpha, *lda, (void*)beta, *ldb, *ldc );
|
||||
|
||||
if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1)
|
||||
{
|
||||
bli_print_msg( " Invalid function parameters comatadd_() .", __FILE__, __LINE__ );
|
||||
@@ -344,8 +358,13 @@ void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
|
||||
|
||||
void zomatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *transa, *transb, *m, *n,
|
||||
(void*)alpha, *lda, (void*)beta, *ldb, *ldc );
|
||||
|
||||
if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1)
|
||||
{
|
||||
bli_print_msg( " Invalid function parameters zomatadd_() .", __FILE__, __LINE__ );
|
||||
|
||||
@@ -60,7 +60,14 @@ static dim_t bli_zoMatCopy_cc(dim_t rows, dim_t cols, const dcomplex alpha, cons
|
||||
|
||||
void somatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb)
|
||||
{
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *ldb );
|
||||
|
||||
if ( !(*trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
@@ -108,8 +115,13 @@ void somatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alp
|
||||
|
||||
void domatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *ldb );
|
||||
|
||||
if ( !(*trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
@@ -157,8 +169,13 @@ void domatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* al
|
||||
|
||||
void comatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *ldb );
|
||||
|
||||
if ( !(*trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
@@ -207,8 +224,13 @@ void comatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex*
|
||||
|
||||
void zomatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *ldb );
|
||||
|
||||
if ( !(*trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
|
||||
@@ -204,8 +204,13 @@ void somatcopy2_blis_impl
|
||||
f77_int* strideb
|
||||
)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *stridea, *ldb, *strideb );
|
||||
|
||||
if ( !(*trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
@@ -296,8 +301,13 @@ void domatcopy2_blis_impl
|
||||
f77_int* strideb
|
||||
)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *stridea, *ldb, *strideb );
|
||||
|
||||
if ( !( *trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
@@ -388,8 +398,13 @@ void comatcopy2_blis_impl
|
||||
f77_int* strideb
|
||||
)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *stridea, *ldb, *strideb );
|
||||
|
||||
if ( !( *trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
@@ -480,8 +495,13 @@ void zomatcopy2_blis_impl
|
||||
f77_int* strideb
|
||||
)
|
||||
{
|
||||
/* Initialize BLIS. */
|
||||
// Call to bli_init_auto() is not needed here
|
||||
AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL);
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
//bli_init_once();
|
||||
AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *trans, *rows, *cols,
|
||||
(void*)alpha, *lda, *stridea, *ldb, *strideb );
|
||||
|
||||
if ( !( *trans == 'n' || *trans == 'N' ||
|
||||
*trans == 't' || *trans == 'T' ||
|
||||
*trans == 'c' || *trans == 'C' ||
|
||||
|
||||
@@ -69,6 +69,9 @@ void PASTEF77S(ch,blasname) \
|
||||
\
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
AOCL_DTL_LOG_GEMM_BATCH_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *group_count); \
|
||||
\
|
||||
/* Perform BLAS parameter checking. */ \
|
||||
f77_int count; \
|
||||
@@ -133,6 +136,8 @@ void PASTEF77S(ch,blasname) \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
} \
|
||||
IF_BLIS_ENABLE_BLAS(\
|
||||
@@ -184,6 +189,9 @@ void PASTEF77S(ch,blasname) \
|
||||
\
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
AOCL_DTL_LOG_GEMM_BATCH_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *group_count); \
|
||||
\
|
||||
/* Perform BLAS parameter checking. */ \
|
||||
f77_int count; \
|
||||
@@ -266,6 +274,7 @@ void PASTEF77S(ch,blasname) \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
} \
|
||||
|
||||
Reference in New Issue
Block a user