diff --git a/aocl_dtl/aocldtl_blis.c b/aocl_dtl/aocldtl_blis.c index 78fbe289d..f099eafc8 100644 --- a/aocl_dtl/aocldtl_blis.c +++ b/aocl_dtl/aocldtl_blis.c @@ -478,6 +478,95 @@ void AOCL_DTL_log_trsm_stats(int8 loglevel, // Level-3 Extension Logging +void AOCL_DTL_log_gemm3m_sizes(int8 loglevel, + char dt_type, + const f77_char transa, + const f77_char transb, + const f77_int m, + const f77_int n, + const f77_int k, + const void *alpha, + const f77_int lda, + const f77_int ldb, + const void *beta, + const f77_int ldc, + const char *filename, + const char *function_name, + int line) +{ + char buffer[256]; + + double alpha_real = 0.0; + double alpha_imag = 0.0; + double beta_real = 0.0; + double beta_imag = 0.0; + + DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag); + DTL_get_complex_parts(dt_type, beta, &beta_real, &beta_imag); + + // Ordering as per cblas/blas interfaces + // {S, D, C, Z} transa, transb, m, n, k, alpha_real, alpha_imag, + // lda, ldb, beta_real, beta_imag, ldc + sprintf(buffer, "%c %c %c %ld %ld %ld %lf %lf %ld %ld %lf %lf %ld", + tolower(dt_type), + transa, transb, + (dim_t)m, (dim_t)n, (dim_t)k, + alpha_real, alpha_imag, + (inc_t)lda, (inc_t)ldb, + beta_real, beta_imag, + (inc_t)ldc); + + AOCL_DTL_START_PERF_TIMER(); + DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer); +} + +void AOCL_DTL_log_gemm3m_stats(int8 loglevel, + char dt_type, + const f77_int m, + const f77_int n, + const f77_int k) +{ + char buffer[256]; + + // Execution time is in micro seconds. + Double execution_time = AOCL_DTL_get_time_spent(); + + double flops = 2.0 * m * n * k; + if (dt_type == 'c' || dt_type == 'C' || dt_type == 'z' || dt_type == 'Z') + { + flops = 4.0 * flops; + } + + if (execution_time != 0.0) + sprintf(buffer, " nt=%ld %.3f ms %0.3f GFLOPS", + AOCL_get_requested_threads_count(), + execution_time/1000.0, + flops/(execution_time * 1e3)); + else + sprintf(buffer, " nt=%ld %.3f ms", + AOCL_get_requested_threads_count(), + execution_time/1000.0); + + DTL_Trace(loglevel, TRACE_TYPE_RAW, NULL, NULL, 0, buffer); +} + +void AOCL_DTL_log_gemm_batch_sizes(int8 loglevel, + char dt_type, + const f77_int group_count, + const char *filename, + const char *function_name, + int line) +{ + char buffer[256]; + + // Ordering as per cblas/blas interfaces + // {S, D, C, Z} identifier, group_count + sprintf(buffer, "%c %ld\n", tolower(dt_type), + (dim_t)group_count); + + DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer); +} + void AOCL_DTL_log_gemm_get_size_sizes(int8 loglevel, char dt_type, const f77_char identifier, @@ -1288,6 +1377,21 @@ void AOCL_DTL_log_rotmg_sizes(int8 loglevel, // Level-1 Logging +void AOCL_DTL_log_amin_sizes(int8 loglevel, + char dt_type, + const f77_int n, + const f77_int incx, + const char *filename, + const char *function_name, + int line) +{ + char buffer[256]; + // {S, D, C, Z} {n, incx} + sprintf(buffer, "%c %ld %ld\n", tolower(dt_type), (dim_t)n, (dim_t)incx); + + DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer); +} + void AOCL_DTL_log_amax_sizes(int8 loglevel, char dt_type, const f77_int n, @@ -1486,4 +1590,115 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel, DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer); } +// Matrix Copy and Transpose Logging + +void AOCL_DTL_log_matadd_sizes(int8 loglevel, + char dt_type, + const f77_char transa, + const f77_char transb, + const f77_int m, + const f77_int n, + const void* alpha, + const f77_int lda, + const void* beta, + const f77_int ldb, + const f77_int ldc, + const char* filename, + const char* function_name, + int line) +{ + char buffer[256]; + + double alpha_real = 0.0; + double alpha_imag = 0.0; + double beta_real = 0.0; + double beta_imag = 0.0; + + DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag); + DTL_get_complex_parts(dt_type, beta, &beta_real, &beta_imag); + + // Ordering as per cblas/blas interfaces + // {S, D, C, Z} transa, transb, m, n, alpha_real, alpha_imag, + // lda, beta_real, beta_imag, ldb, ldc + sprintf(buffer, "%c %c %c %ld %ld %lf %lf %ld %lf %lf %ld %ld", + tolower(dt_type), + transa, transb, + (dim_t)m, (dim_t)n, + alpha_real, alpha_imag, (inc_t)lda, + beta_real, beta_imag, (inc_t)ldb, + (inc_t)ldc); + + AOCL_DTL_START_PERF_TIMER(); + DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer); +} + + + +void AOCL_DTL_log_matcopy_sizes(int8 loglevel, + char dt_type, + const f77_char trans, + const f77_int rows, + const f77_int cols, + const void* alpha, + const f77_int lda, + const f77_int ldb, + const char* filename, + const char* function_name, + int line) +{ + char buffer[256]; + + double alpha_real = 0.0; + double alpha_imag = 0.0; + + DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag); + + // Ordering as per cblas/blas interfaces + // {S, D, C, Z} trans, rows, cols, alpha_real, alpha_imag, + // lda, ldb + sprintf(buffer, "%c %c %ld %ld %lf %lf %ld %ld", + tolower(dt_type), trans, + (dim_t)rows, (dim_t)cols, + alpha_real, alpha_imag, + (inc_t)lda, (inc_t)ldb); + + AOCL_DTL_START_PERF_TIMER(); + DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer); +} + +void AOCL_DTL_log_matcopy2_sizes(int8 loglevel, + char dt_type, + const f77_char trans, + const f77_int rows, + const f77_int cols, + const void* alpha, + const f77_int lda, + const f77_int stridea, + const f77_int ldb, + const f77_int strideb, + const char* filename, + const char* function_name, + int line) +{ + char buffer[256]; + + double alpha_real = 0.0; + double alpha_imag = 0.0; + + DTL_get_complex_parts(dt_type, alpha, &alpha_real, &alpha_imag); + + // Ordering as per cblas/blas interfaces + // {S, D, C, Z} trans, rows, cols, alpha_real, alpha_imag, + // lda, stridea, ldb, strideb + sprintf(buffer, "%c %c %ld %ld %lf %lf %ld %ld %ld %ld", + tolower(dt_type), trans, + (dim_t)rows, (dim_t)cols, + alpha_real, alpha_imag, + (inc_t)lda, (inc_t)stridea, + (inc_t)ldb, (inc_t)strideb); + + AOCL_DTL_START_PERF_TIMER(); + DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer); +} + #endif diff --git a/aocl_dtl/aocldtl_blis.h b/aocl_dtl/aocldtl_blis.h index c82accbac..65644f350 100644 --- a/aocl_dtl/aocldtl_blis.h +++ b/aocl_dtl/aocldtl_blis.h @@ -186,6 +186,35 @@ void AOCL_DTL_log_trsm_stats(int8 loglevel, // Level-3 Extension Logging +void AOCL_DTL_log_gemm3m_sizes(int8 loglevel, + char dt_type, + const f77_char transa, + const f77_char transb, + const f77_int m, + const f77_int n, + const f77_int k, + const void *alpha, + const f77_int lda, + const f77_int ldb, + const void *beta, + const f77_int ldc, + const char *filename, + const char *function_name, + int line); + +void AOCL_DTL_log_gemm3m_stats(int8 loglevel, + char dt_type, + const f77_int m, + const f77_int n, + const f77_int k); + +void AOCL_DTL_log_gemm_batch_sizes(int8 loglevel, + char dt_type, + const f77_int group_count, + const char *filename, + const char *function_name, + int line); + void AOCL_DTL_log_gemm_get_size_sizes(int8 loglevel, char dt_type, const f77_char identifer, @@ -560,6 +589,14 @@ void AOCL_DTL_log_rotmg_sizes(int8 loglevel, // Level-1 Logging +void AOCL_DTL_log_amin_sizes(int8 loglevel, + char dt_type, + const f77_int n, + const f77_int incx, + const char* filename, + const char* function_name, + int line); + void AOCL_DTL_log_amax_sizes(int8 loglevel, char dt_type, const f77_int n, @@ -648,6 +685,49 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel, const char* function_name, int line); +// Matrix Copy and Transpose Logging + +void AOCL_DTL_log_matadd_sizes(int8 loglevel, + char dt_type, + const f77_char transa, + const f77_char transb, + const f77_int m, + const f77_int n, + const void* alpha, + const f77_int lda, + const void* beta, + const f77_int ldb, + const f77_int ldc, + const char* filename, + const char* function_name, + int line); + +void AOCL_DTL_log_matcopy_sizes(int8 loglevel, + char dt_type, + const f77_char trans, + const f77_int rows, + const f77_int cols, + const void* alpha, + const f77_int lda, + const f77_int ldb, + const char* filename, + const char* function_name, + int line); + +void AOCL_DTL_log_matcopy2_sizes(int8 loglevel, + char dt_type, + const f77_char trans, + const f77_int rows, + const f77_int cols, + const void* alpha, + const f77_int lda, + const f77_int stridea, + const f77_int ldb, + const f77_int strideb, + const char* filename, + const char* function_name, + int line); + // Level-3 Macros #define AOCL_DTL_LOG_GEMM_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc) \ @@ -714,6 +794,20 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel, // Level-3 Extension Macros +#define AOCL_DTL_LOG_GEMM3M_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc) \ + if (gbIsLoggingEnabled) \ + AOCL_DTL_log_gemm3m_sizes(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc, \ + __FILE__, __FUNCTION__, __LINE__); + +#define AOCL_DTL_LOG_GEMM3M_STATS(loglevel, dt_type, m, n, k) \ + if (gbIsLoggingEnabled) \ + AOCL_DTL_log_gemm3m_stats(loglevel, dt_type, m, n, k); + +#define AOCL_DTL_LOG_GEMM_BATCH_INPUTS(loglevel, dt, group_count) \ + if (gbIsLoggingEnabled) \ + AOCL_DTL_log_gemm_batch_sizes(loglevel, dt, group_count, \ + __FILE__, __FUNCTION__, __LINE__); + #define AOCL_DTL_LOG_GEMM_GET_SIZE_INPUTS(loglevel, dt, identifier, m, n, k) \ if (gbIsLoggingEnabled) \ AOCL_DTL_log_gemm_get_size_sizes(loglevel, dt, identifier, m, n, k, \ @@ -871,6 +965,10 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel, // Level-1 Macros +#define AOCL_DTL_LOG_AMIN_INPUTS(loglevel, dt_type, n, incx) \ + if (gbIsLoggingEnabled) \ + AOCL_DTL_log_amin_sizes(loglevel, dt_type, n, incx, __FILE__, __FUNCTION__, __LINE__); + #define AOCL_DTL_LOG_AMAX_INPUTS(loglevel, dt_type, n, incx) \ if (gbIsLoggingEnabled) \ AOCL_DTL_log_amax_sizes(loglevel, dt_type, n, incx, __FILE__, __FUNCTION__, __LINE__); @@ -913,6 +1011,19 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel, if (gbIsLoggingEnabled) \ AOCL_DTL_log_swap_sizes(loglevel, dt_type, n, incx, incy, __FILE__,__FUNCTION__,__LINE__); +// Matrix Copy and Transpose Macros + +#define AOCL_DTL_LOG_MATADD_INPUTS(loglevel, dt_type, transa, transb, m, n, alpha, lda, beta, ldb, ldc ) \ + if (gbIsLoggingEnabled) \ + AOCL_DTL_log_matadd_sizes(loglevel, dt_type, transa, transb, m, n, alpha, lda, beta, ldb, ldc, __FILE__,__FUNCTION__,__LINE__); + +#define AOCL_DTL_LOG_MATCOPY_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, ldb ) \ + if (gbIsLoggingEnabled) \ + AOCL_DTL_log_matcopy_sizes(loglevel, dt_type, trans, rows, cols, alpha, lda, ldb, __FILE__,__FUNCTION__,__LINE__); + +#define AOCL_DTL_LOG_MATCOPY2_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, stridea, ldb, strideb ) \ + if (gbIsLoggingEnabled) \ + AOCL_DTL_log_matcopy2_sizes(loglevel, dt_type, trans, rows, cols, alpha, lda, stridea, ldb, strideb, __FILE__,__FUNCTION__,__LINE__); #else // AOCL_DTL_LOG_ENABLE @@ -946,6 +1057,12 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel, // Level-3 Extension Macros +#define AOCL_DTL_LOG_GEMM3M_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc) + +#define AOCL_DTL_LOG_GEMM3M_STATS(loglevel, dt_type, m, n, k) + +#define AOCL_DTL_LOG_GEMM_BATCH_INPUTS(loglevel, dt, group_count) + #define AOCL_DTL_LOG_GEMM_GET_SIZE_INPUTS(loglevel, dt, identifier, m, n, k) #define AOCL_DTL_LOG_GEMM_PACK_INPUTS(loglevel, dt, identifier, trans, m, n, k, alpha, pld) @@ -1016,6 +1133,8 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel, // Level-1 Macros +#define AOCL_DTL_LOG_AMIN_INPUTS(loglevel, dt_type, n, incx) + #define AOCL_DTL_LOG_AMAX_INPUTS(loglevel, dt_type, n, incx) #define AOCL_DTL_LOG_ASUM_INPUTS(loglevel, dt_type, n, incx) @@ -1036,6 +1155,14 @@ void AOCL_DTL_log_swap_sizes(int8 loglevel, #define AOCL_DTL_LOG_SWAP_INPUTS(loglevel, dt_type, n, incx, incy) +// Matrix Copy and Transpose Macros + +#define AOCL_DTL_LOG_MATADD_INPUTS(loglevel, dt_type, transa, transb, m, n, alpha, lda, beta, ldb, ldc ) + +#define AOCL_DTL_LOG_MATCOPY_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, ldb ) + +#define AOCL_DTL_LOG_MATCOPY2_INPUTS(loglevel, dt_type, trans, rows, cols, alpha, lda, stridea, ldb, strideb ) + #endif // AOCL_DTL_LOG_ENABLE #endif //ndef __AOCLDTL_BLIS_H diff --git a/frame/compat/bla_amax.c b/frame/compat/bla_amax.c index c7df995fb..2f1431af8 100644 --- a/frame/compat/bla_amax.c +++ b/frame/compat/bla_amax.c @@ -96,10 +96,10 @@ f77_int PASTEF772S(i,chx,blasname) \ integer size, that typecast occurs here. */ \ f77_index = bli_index + 1; \ \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ return f77_index; \ }\ \ diff --git a/frame/compat/bla_amin.c b/frame/compat/bla_amin.c index 520b25c34..5ab891ac9 100644 --- a/frame/compat/bla_amin.c +++ b/frame/compat/bla_amin.c @@ -47,6 +47,12 @@ f77_int PASTEF772S(i,chx,blasname) \ const ftype_x* x, const f77_int* incx \ ) \ { \ + /* Initialize BLIS. */ \ + bli_init_auto(); \ +\ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \ + AOCL_DTL_LOG_AMIN_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(chx), *n, *incx) \ +\ dim_t n0; \ ftype_x* x0; \ inc_t incx0; \ @@ -57,10 +63,10 @@ f77_int PASTEF772S(i,chx,blasname) \ is needed to emulate netlib BLAS. Without it, bli_?aminv() will return 0, which ends up getting incremented to 1 (below) before being returned, which is not what we want. */ \ - if ( *n < 1 || *incx <= 0 ) return 0; \ -\ - /* Initialize BLIS. */ \ - bli_init_auto(); \ + if ( *n < 1 || *incx <= 0 ) { \ + AOCL_DTL_TRACE_EXIT_ERR(AOCL_DTL_LEVEL_TRACE_1, "iamin_: vector empty") \ + return 0; \ + }\ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ @@ -84,6 +90,7 @@ f77_int PASTEF772S(i,chx,blasname) \ integer size, that typecast occurs here. */ \ f77_index = bli_index + 1; \ \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ diff --git a/frame/compat/bla_gemm.c b/frame/compat/bla_gemm.c index 55ef04c99..7405dbb2e 100644 --- a/frame/compat/bla_gemm.c +++ b/frame/compat/bla_gemm.c @@ -70,7 +70,7 @@ void PASTEF77S(ch,blasname) \ \ AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \ AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \ - (void*)alpha, *lda, *ldb, (void*)beta, *ldc); \ + (void*)alpha, *lda, *ldb, (void*)beta, *ldc); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ @@ -214,7 +214,7 @@ void PASTEF77S(ch,blasname) \ \ AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \ AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \ - (void*)alpha, *lda, *ldb, (void*)beta, *ldc); \ + (void*)alpha, *lda, *ldb, (void*)beta, *ldc); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ diff --git a/frame/compat/bla_gemm3m.c b/frame/compat/bla_gemm3m.c index 70816fb82..e1a42ba4f 100644 --- a/frame/compat/bla_gemm3m.c +++ b/frame/compat/bla_gemm3m.c @@ -69,6 +69,8 @@ void PASTEF77S(ch,blasname) \ bli_init_auto(); \ \ AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \ + AOCL_DTL_LOG_GEMM3M_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \ + (void*)alpha, *lda, *ldb, (void*)beta, *ldc); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ @@ -89,6 +91,7 @@ void PASTEF77S(ch,blasname) \ if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \ && PASTEMAC(ch,eq1)( *beta ) )) \ { \ + AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ @@ -145,6 +148,7 @@ void PASTEF77S(ch,blasname) \ ); \ } \ \ + AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ @@ -195,6 +199,8 @@ void PASTEF77S(ch,blasname) \ bli_init_auto(); \ \ AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \ + AOCL_DTL_LOG_GEMM3M_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \ + (void*)alpha, *lda, *ldb, (void*)beta, *ldc); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ @@ -215,6 +221,7 @@ void PASTEF77S(ch,blasname) \ if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \ && PASTEMAC(ch,eq1)( *beta ) )) \ { \ + AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ @@ -293,6 +300,7 @@ void PASTEF77S(ch,blasname) \ } \ \ \ + AOCL_DTL_LOG_GEMM3M_STATS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, *k); \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ diff --git a/frame/compat/bla_imatcopy.c b/frame/compat/bla_imatcopy.c index 231ab3a12..ce5ddaebe 100644 --- a/frame/compat/bla_imatcopy.c +++ b/frame/compat/bla_imatcopy.c @@ -165,9 +165,13 @@ void simatcopy_blis_impl f77_int* ldb ) { - //printf("I am from simatcopy_\n"); + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *trans, *rows, *cols, + (void*)alpha, *lda, *ldb ); + if ( !( *trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || @@ -251,8 +255,13 @@ void dimatcopy_blis_impl f77_int* ldb ) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *trans, *rows, *cols, + (void*)alpha, *lda, *ldb ); + if ( !( *trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || @@ -336,8 +345,13 @@ void cimatcopy_blis_impl f77_int* ldb ) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *trans, *rows, *cols, + (void*)alpha, *lda, *ldb ); + if ( !( *trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || @@ -421,8 +435,13 @@ void zimatcopy_blis_impl f77_int* ldb ) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *trans, *rows, *cols, + (void*)alpha, *lda, *ldb ); + if ( !( *trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || diff --git a/frame/compat/bla_omatadd.c b/frame/compat/bla_omatadd.c index 70dc39cf3..d6cdd4eee 100644 --- a/frame/compat/bla_omatadd.c +++ b/frame/compat/bla_omatadd.c @@ -96,8 +96,12 @@ static void bli_zconjugate(dcomplex* A,dim_t cols,dim_t rows) void somatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *transa, *transb, *m, *n, + (void*)alpha, *lda, (void*)beta, *ldb, *ldc ); if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1) { @@ -175,8 +179,13 @@ void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const void domatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *transa, *transb, *m, *n, + (void*)alpha, *lda, (void*)beta, *ldb, *ldc ); + if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1) { bli_print_msg( " Invalid function parameters domatadd_() .", __FILE__, __LINE__ ); @@ -253,8 +262,13 @@ void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const void comatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *transa, *transb, *m, *n, + (void*)alpha, *lda, (void*)beta, *ldb, *ldc ); + if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1) { bli_print_msg( " Invalid function parameters comatadd_() .", __FILE__, __LINE__ ); @@ -344,8 +358,13 @@ void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const void zomatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATADD_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *transa, *transb, *m, *n, + (void*)alpha, *lda, (void*)beta, *ldb, *ldc ); + if( alpha == NULL || A == NULL || beta == NULL || B == NULL || C == NULL || *lda < 1 || *ldb < 1 || *ldc < 1 || *m < 1 || *n < 1) { bli_print_msg( " Invalid function parameters zomatadd_() .", __FILE__, __LINE__ ); diff --git a/frame/compat/bla_omatcopy.c b/frame/compat/bla_omatcopy.c index b66cc1958..3086b626b 100644 --- a/frame/compat/bla_omatcopy.c +++ b/frame/compat/bla_omatcopy.c @@ -60,7 +60,14 @@ static dim_t bli_zoMatCopy_cc(dim_t rows, dim_t cols, const dcomplex alpha, cons void somatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb) { + + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); + AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *trans, *rows, *cols, + (void*)alpha, *lda, *ldb ); + if ( !(*trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || @@ -108,8 +115,13 @@ void somatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alp void domatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *trans, *rows, *cols, + (void*)alpha, *lda, *ldb ); + if ( !(*trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || @@ -157,8 +169,13 @@ void domatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* al void comatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *trans, *rows, *cols, + (void*)alpha, *lda, *ldb ); + if ( !(*trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || @@ -207,8 +224,13 @@ void comatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* void zomatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *trans, *rows, *cols, + (void*)alpha, *lda, *ldb ); + if ( !(*trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || diff --git a/frame/compat/bla_omatcopy2.c b/frame/compat/bla_omatcopy2.c index 0e6694856..5d8719d1f 100644 --- a/frame/compat/bla_omatcopy2.c +++ b/frame/compat/bla_omatcopy2.c @@ -204,8 +204,13 @@ void somatcopy2_blis_impl f77_int* strideb ) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), *trans, *rows, *cols, + (void*)alpha, *lda, *stridea, *ldb, *strideb ); + if ( !(*trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || @@ -296,8 +301,13 @@ void domatcopy2_blis_impl f77_int* strideb ) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *trans, *rows, *cols, + (void*)alpha, *lda, *stridea, *ldb, *strideb ); + if ( !( *trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || @@ -388,8 +398,13 @@ void comatcopy2_blis_impl f77_int* strideb ) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(c), *trans, *rows, *cols, + (void*)alpha, *lda, *stridea, *ldb, *strideb ); + if ( !( *trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || @@ -480,8 +495,13 @@ void zomatcopy2_blis_impl f77_int* strideb ) { + /* Initialize BLIS. */ + // Call to bli_init_auto() is not needed here + AOCL_DTL_INITIALIZE(AOCL_DTL_TRACE_LEVEL); AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - //bli_init_once(); + AOCL_DTL_LOG_MATCOPY2_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *trans, *rows, *cols, + (void*)alpha, *lda, *stridea, *ldb, *strideb ); + if ( !( *trans == 'n' || *trans == 'N' || *trans == 't' || *trans == 'T' || *trans == 'c' || *trans == 'C' || diff --git a/frame/compat/extra/bla_gemm_batch.c b/frame/compat/extra/bla_gemm_batch.c index 978264972..0aba3c164 100644 --- a/frame/compat/extra/bla_gemm_batch.c +++ b/frame/compat/extra/bla_gemm_batch.c @@ -69,6 +69,9 @@ void PASTEF77S(ch,blasname) \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ +\ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \ + AOCL_DTL_LOG_GEMM_BATCH_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *group_count); \ \ /* Perform BLAS parameter checking. */ \ f77_int count; \ @@ -133,6 +136,8 @@ void PASTEF77S(ch,blasname) \ } \ } \ \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \ + /* Finalize BLIS. */ \ bli_finalize_auto(); \ } \ IF_BLIS_ENABLE_BLAS(\ @@ -184,6 +189,9 @@ void PASTEF77S(ch,blasname) \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ +\ + AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \ + AOCL_DTL_LOG_GEMM_BATCH_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *group_count); \ \ /* Perform BLAS parameter checking. */ \ f77_int count; \ @@ -266,6 +274,7 @@ void PASTEF77S(ch,blasname) \ } \ } \ \ + AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } \