mirror of
https://github.com/amd/blis.git
synced 2026-04-19 23:28:52 +00:00
AOCL DTL - Added thread and execution time details in logs
-- Added number of threads used in DTL logs
-- Added support for timestamps in DTL traces
-- Added time taken by API at BLAS layer in the DTL logs
-- Added GFLOPS achieved in DTL logs
-- Added support to enable/disable execution time and
gflops printing for individual API's. We may not want
it for all API's. Also it will help us migrate API's
to execution time and gflops logs in stages.
-- Updated GEMM bench to match new logs
-- Refactored aocldtl_blis.c to remove code duplication.
-- Clean up logs generation and reading to use spaces
consistently to separate various fields.
-- Updated AOCL_gettid() to return correct thread id
when using pthreads.
AMD-Internal: [CPUPL-1691]
Change-Id: Iddb8a3be2a5cd624a07ccdbf5ae0695799d8ae8e
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
* These functions are invoked though macros by
|
||||
* end user.
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights Reserved.
|
||||
* Copyright (C) 2020-2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*=======================================================================*/
|
||||
#include "blis.h"
|
||||
@@ -23,10 +23,23 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Client should provide this function, it should return
|
||||
* number of threads used by the API
|
||||
*/
|
||||
extern dim_t AOCL_get_requested_threads_count(void);
|
||||
|
||||
/* By default the trace level will be set to ALL User can configure this
|
||||
parameter at run time using command line argument */
|
||||
uint32 gui32TraceLogLevel = AOCL_DTL_TRACE_LEVEL;
|
||||
|
||||
/*
|
||||
* Time elapsed in the function will be logged from main thread only,
|
||||
* we will save the main thread id. This will be compared with the id
|
||||
* of the logging thread.
|
||||
*/
|
||||
AOCL_TID gtidMainThreadID = -1;
|
||||
|
||||
/* The user can configure the file name in which he wants to dump the data */
|
||||
#if AOCL_DTL_TRACE_ENABLE
|
||||
/* The file name for storing traced log added manually in the code */
|
||||
@@ -117,6 +130,9 @@ void DTL_Initialize(
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Save Id for main thread */
|
||||
gtidMainThreadID = AOCL_gettid();
|
||||
|
||||
} /* DTL_Initialize */
|
||||
#endif
|
||||
|
||||
@@ -162,6 +178,7 @@ void DTL_Uninitialize(void)
|
||||
* pi8FunctionName - Function Name
|
||||
* ui32LineNumber - Line number
|
||||
* pi8Message - Message to be printed
|
||||
*
|
||||
* Output Parameter(s) : None
|
||||
* Return parameter(s) : None
|
||||
*==================================================================*/
|
||||
@@ -176,6 +193,8 @@ void DTL_Trace(
|
||||
{
|
||||
uint8 i = 0;
|
||||
AOCL_FAL_FILE *pOutFile = NULL;
|
||||
uint64 u64EventTime = AOCL_getTimestamp();
|
||||
dim_t u64RequestedThreadsCount = AOCL_get_requested_threads_count();
|
||||
|
||||
bli_init_auto();
|
||||
|
||||
@@ -226,7 +245,6 @@ void DTL_Trace(
|
||||
level set while initialization */
|
||||
if (ui8LogLevel <= gui32TraceLogLevel)
|
||||
{
|
||||
|
||||
/* Indent as per level if is function call trace */
|
||||
if ((ui8LogLevel >= AOCL_DTL_LEVEL_TRACE_1) &&
|
||||
(ui8LogLevel <= AOCL_DTL_LEVEL_TRACE_8))
|
||||
@@ -242,26 +260,39 @@ void DTL_Trace(
|
||||
switch (ui8LogType)
|
||||
{
|
||||
case TRACE_TYPE_FENTRY:
|
||||
fprintf(pOutFile, "In %s()...\n", pi8FunctionName);
|
||||
fprintf(pOutFile, "nt=%ld,ts=%ld: In %s()...\n",
|
||||
u64RequestedThreadsCount,
|
||||
u64EventTime,
|
||||
pi8FunctionName);
|
||||
break;
|
||||
|
||||
case TRACE_TYPE_FEXIT:
|
||||
if (pi8Message == NULL)
|
||||
{ /* Function returned successfully */
|
||||
fprintf(pOutFile, "Out of %s()\n", pi8FunctionName);
|
||||
fprintf(pOutFile, "ts=%ld: Out of %s()\n",
|
||||
u64EventTime,
|
||||
pi8FunctionName);
|
||||
}
|
||||
else
|
||||
{ /* Function failed to complete, use message to get error */
|
||||
fprintf(pOutFile, "Out of %s() with error %s\n", pi8FunctionName, pi8Message);
|
||||
fprintf(pOutFile, "ts=%ld: Out of %s() with error %s\n",
|
||||
u64EventTime,
|
||||
pi8FunctionName,
|
||||
pi8Message);
|
||||
}
|
||||
break;
|
||||
|
||||
case TRACE_TYPE_LOG:
|
||||
fprintf(pOutFile, "%s:%d:%s\n", pi8FileName, ui32LineNumber, pi8Message);
|
||||
fprintf(pOutFile, "%s %s",
|
||||
pi8FileName,
|
||||
pi8Message
|
||||
);
|
||||
|
||||
break;
|
||||
|
||||
case TRACE_TYPE_RAW:
|
||||
fprintf(pOutFile, "%s\n", pi8Message);
|
||||
fprintf(pOutFile, "%s\n",
|
||||
pi8Message);
|
||||
break;
|
||||
}
|
||||
fflush(pOutFile);
|
||||
@@ -407,6 +438,72 @@ void DTL_DumpData(
|
||||
} /* DTL_DumpData */
|
||||
#endif
|
||||
|
||||
#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_LOG_ENABLE)
|
||||
void AOCL_DTL_start_perf_timer(void)
|
||||
{
|
||||
AOCL_TID current_thread = AOCL_gettid();
|
||||
|
||||
// Automatic duration calulation is currently
|
||||
// supported from main thread only, in other words
|
||||
// at BLAS interface.
|
||||
if (current_thread != gtidMainThreadID) {
|
||||
return;
|
||||
}
|
||||
|
||||
AOCL_FLIST_Node *pFileNode = AOCL_FLIST_GetNode(gpLogFileList, current_thread);
|
||||
|
||||
if (NULL == pFileNode) {
|
||||
/* It might be the first call from the current thread, try to create
|
||||
new trace for this thread. */
|
||||
AOCL_FAL_FILE *pOutFile = AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, current_thread);
|
||||
|
||||
if (NULL == pOutFile)
|
||||
{
|
||||
AOCL_DEBUGPRINT("File does not exists to dump the trace data \n");
|
||||
return;
|
||||
} else {
|
||||
pFileNode = AOCL_FLIST_GetNode(gpLogFileList, current_thread);
|
||||
}
|
||||
}
|
||||
|
||||
pFileNode->u64SavedTimeStamp = AOCL_getTimestamp();
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
|
||||
uint64 AOCL_DTL_get_time_spent(void)
|
||||
{
|
||||
AOCL_TID current_thread = AOCL_gettid();
|
||||
|
||||
// Automatic duration calulation is currently
|
||||
// supported from main thread only, in other words
|
||||
// at BLAS interface.
|
||||
if (current_thread != gtidMainThreadID) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64 u64CurrentTimeStamp = AOCL_getTimestamp();
|
||||
AOCL_FLIST_Node *pFileNode = AOCL_FLIST_GetNode(gpLogFileList, AOCL_gettid());
|
||||
|
||||
if (NULL == pFileNode) {
|
||||
/* It might be the first call from the current thread, try to create
|
||||
new trace for this thread. */
|
||||
AOCL_FAL_FILE *pOutFile = AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, AOCL_gettid());
|
||||
|
||||
if (NULL == pOutFile)
|
||||
{
|
||||
AOCL_DEBUGPRINT("File does not exists to dump the trace data \n");
|
||||
return 0;
|
||||
} else {
|
||||
pFileNode = AOCL_FLIST_GetNode(gpLogFileList, AOCL_gettid());
|
||||
}
|
||||
}
|
||||
|
||||
return (u64CurrentTimeStamp - pFileNode->u64SavedTimeStamp);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* This is enabled by passing ETRACE_ENABLE=1 to make */
|
||||
#ifdef AOCL_DTL_AUTO_TRACE_ENABLE
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
/*===================================================================
|
||||
* File Name : aocldtl.h
|
||||
*
|
||||
*
|
||||
* Description : This is main interface file for the end user
|
||||
* It provides defination for all macros to be
|
||||
* It provides defination for all macros to be
|
||||
* used by user to add debug/trace information.
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc
|
||||
*
|
||||
* Copyright (C) 2020-2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*==================================================================*/
|
||||
|
||||
#ifndef _AOCLDTL_H_
|
||||
@@ -47,7 +47,7 @@
|
||||
#endif
|
||||
|
||||
#if AOCL_DTL_TRACE_ENABLE
|
||||
/* Exit macro to trace the flow of control The parameter LogLevel specifies
|
||||
/* Exit macro to trace the flow of control The parameter LogLevel specifies
|
||||
log level String will preferably contains the function name in which this
|
||||
macro is invoked */
|
||||
#define AOCL_DTL_TRACE_EXIT(LogLevel) \
|
||||
@@ -72,8 +72,8 @@
|
||||
#endif
|
||||
|
||||
#if AOCL_DTL_DUMP_ENABLE
|
||||
/* Macro to Dump the DATA The parameters Buffer contains the data to be
|
||||
dumped BufferSize specifies the no. of bytes to be dumped DataType
|
||||
/* Macro to Dump the DATA The parameters Buffer contains the data to be
|
||||
dumped BufferSize specifies the no. of bytes to be dumped DataType
|
||||
specifies the data type of Buffer */
|
||||
#define AOCL_DTL_DUMP(LogLevel, Buffer, BufferSize, DataType, String, OutputType) \
|
||||
/* Call the Dump function to Dump the DATA */ \
|
||||
@@ -103,6 +103,19 @@
|
||||
#define AOCL_DTL_LOG(LogLevel, Message)
|
||||
#endif
|
||||
|
||||
#if AOCL_DTL_LOG_ENABLE
|
||||
|
||||
void AOCL_DTL_start_perf_timer(void);
|
||||
uint64 AOCL_DTL_get_time_spent(void);
|
||||
|
||||
/* Macro to log the Data */
|
||||
#define AOCL_DTL_START_PERF_TIMER() \
|
||||
AOCL_DTL_start_perf_timer()
|
||||
#else
|
||||
/* Dummy macro definition if the AOCL_DTL_LOG_ENABLE macro is not enabled */
|
||||
#define AOCL_DTL_START_PERF_TIMER()
|
||||
#endif
|
||||
|
||||
/* Macro to initialize the prerequisite for debuging */
|
||||
#ifdef AOCL_DTL_INITIALIZE_ENABLE
|
||||
#define AOCL_DTL_INITIALIZE(CURRENT_LOG_LEVEL) \
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -14,22 +14,29 @@
|
||||
#include "blis.h"
|
||||
|
||||
#if AOCL_DTL_LOG_ENABLE
|
||||
dim_t AOCL_get_requested_threads_count(void);
|
||||
|
||||
void AOCL_DTL_log_gemm_sizes(int8 loglevel,
|
||||
char dt,
|
||||
char dt_type,
|
||||
const f77_char transa,
|
||||
const f77_char transb,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const f77_int k,
|
||||
const void* alpha,
|
||||
const void *alpha,
|
||||
const f77_int lda,
|
||||
const f77_int ldb,
|
||||
const void* beta,
|
||||
const void *beta,
|
||||
const f77_int ldc,
|
||||
const char* filename,
|
||||
const char* functionn_name,
|
||||
const char *filename,
|
||||
const char *function_name,
|
||||
int line);
|
||||
|
||||
void AOCL_DTL_log_gemm_stats(int8 loglevel,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const f77_int k);
|
||||
|
||||
void AOCL_DTL_log_trsm_sizes(int8 loglevel,
|
||||
char dt,
|
||||
f77_char side,
|
||||
@@ -376,9 +383,13 @@ void AOCL_DTL_log_trmm_sizes(int8 loglevel,
|
||||
const char* function_name,
|
||||
int line);
|
||||
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc) \
|
||||
AOCL_DTL_log_gemm_sizes(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc, __FILE__, __FUNCTION__, __LINE__);
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_STATS(loglevel, m, n, k) \
|
||||
AOCL_DTL_log_gemm_stats(loglevel, m, n, k);
|
||||
|
||||
#define AOCL_DTL_LOG_TRSM_INPUTS(loglevel, dt, side, uploa, transa, diaga, m, n, alpha, lda, ldb) \
|
||||
AOCL_DTL_log_trsm_sizes(loglevel, dt, side, uploa, transa, diaga, m, n, alpha, lda, ldb, __FILE__, __FUNCTION__, __LINE__);
|
||||
|
||||
@@ -487,6 +498,8 @@ void AOCL_DTL_log_trmm_sizes(int8 loglevel,
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_INPUTS(loglevel, dt, transa, transb, m, n, k, alpha, lda, ldb, beta, ldc)
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_STATS(loglevel, m, n, k)
|
||||
|
||||
#define AOCL_DTL_LOG_TRSM_INPUTS(loglevel, dt, side, uploa, transa, diaga, m, n, alpha, lda, ldb)
|
||||
|
||||
#define AOCL_DTL_LOG_GEMMT_INPUTS(loglevel, dt, uplo, transa, transb, n, k, alpha, lda, ldb, beta, ldc)
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
/*===================================================================
|
||||
* File Name : aoclflist.c
|
||||
*
|
||||
* Description : Linked list of open files assocaited with
|
||||
*
|
||||
* Description : Linked list of open files assocaited with
|
||||
* each thread. This is used to log the data
|
||||
* to correct file as per the current thread id.
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc
|
||||
*
|
||||
*
|
||||
*==================================================================*/
|
||||
|
||||
#include "aocltpdef.h"
|
||||
@@ -16,7 +16,7 @@
|
||||
#include "aoclos.h"
|
||||
|
||||
|
||||
/* Disable instrumentation for following function, since they are called from
|
||||
/* Disable instrumentation for following function, since they are called from
|
||||
* Auto Generated execution trace handlers. */
|
||||
Bool AOCL_FLIST_IsEmpty(
|
||||
AOCL_FLIST_Node *plist) __attribute__((no_instrument_function));
|
||||
@@ -45,6 +45,35 @@ Bool AOCL_FLIST_IsEmpty(AOCL_FLIST_Node *plist)
|
||||
|
||||
} /* AOCL_FLIST_IsEmpty */
|
||||
|
||||
AOCL_FLIST_Node * AOCL_FLIST_GetNode(AOCL_FLIST_Node *plist, AOCL_TID tid)
|
||||
{
|
||||
AOCL_FLIST_Node *temp;
|
||||
|
||||
if (AOCL_FLIST_IsEmpty(plist) == 1)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
temp = plist;
|
||||
|
||||
/* if list is not empty search for the file handle in all nodes */
|
||||
while (temp != NULL)
|
||||
{
|
||||
if (temp->tid == tid)
|
||||
{
|
||||
if (temp->fp == NULL)
|
||||
{
|
||||
AOCL_DEBUGPRINT("Could not get saved time stamp for thread = %d", tid);
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
temp = temp->pNext;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
} /* AOCL_FLIST_GetNode */
|
||||
|
||||
AOCL_FAL_FILE *AOCL_FLIST_GetFile(AOCL_FLIST_Node *plist, AOCL_TID tid)
|
||||
{
|
||||
AOCL_FLIST_Node *temp;
|
||||
@@ -89,7 +118,7 @@ AOCL_FAL_FILE *AOCL_FLIST_AddFile(const int8 *pchFilePrefix, AOCL_FLIST_Node **p
|
||||
}
|
||||
|
||||
/* We don't have exiting file, lets try to open new one */
|
||||
sprintf(pchFileName, "P%d_T%d_%s", AOCL_getpid(), tid, pchFilePrefix);
|
||||
sprintf(pchFileName, "P%d_T%u_%s", AOCL_getpid(), tid, pchFilePrefix);
|
||||
|
||||
file = AOCL_FAL_Open(pchFileName, "wb");
|
||||
if (file == NULL)
|
||||
@@ -108,6 +137,7 @@ AOCL_FAL_FILE *AOCL_FLIST_AddFile(const int8 *pchFilePrefix, AOCL_FLIST_Node **p
|
||||
|
||||
newNode->pNext = NULL;
|
||||
newNode->tid = tid;
|
||||
newNode->u64SavedTimeStamp = AOCL_getTimestamp();
|
||||
newNode->fp = file;
|
||||
|
||||
if (AOCL_FLIST_IsEmpty(*plist) == 1)
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
/*===================================================================
|
||||
* File Name : aoclflist.h
|
||||
*
|
||||
* Description : Linked list of open files assocaited with
|
||||
*
|
||||
* Description : Linked list of open files assocaited with
|
||||
* each thread. This is used to log the deta
|
||||
* to correct file as per the current thread id.
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc
|
||||
*
|
||||
*
|
||||
*==================================================================*/
|
||||
|
||||
#ifndef _AOCL_FLIST_H_
|
||||
@@ -19,12 +19,17 @@ typedef struct AOCL_FLIST_Node_t
|
||||
{
|
||||
AOCL_TID tid;
|
||||
AOCL_FAL_FILE *fp;
|
||||
uint64 u64SavedTimeStamp;
|
||||
struct AOCL_FLIST_Node_t *pNext;
|
||||
} AOCL_FLIST_Node;
|
||||
|
||||
Bool AOCL_FLIST_IsEmpty(
|
||||
AOCL_FLIST_Node *plist);
|
||||
|
||||
AOCL_FLIST_Node * AOCL_FLIST_GetNode(
|
||||
AOCL_FLIST_Node *plist,
|
||||
AOCL_TID tid);
|
||||
|
||||
AOCL_FAL_FILE *AOCL_FLIST_GetFile(
|
||||
AOCL_FLIST_Node *plist,
|
||||
AOCL_TID tid);
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
// BLIS TODO: This is workaround to check if BLIS is built with
|
||||
// BLIS TODO: This is workaround to check if BLIS is built with
|
||||
// openmp support. Ideally we dont' want any library
|
||||
// specific code in dtl.
|
||||
#include <blis.h>
|
||||
@@ -36,19 +36,23 @@
|
||||
|
||||
*/
|
||||
|
||||
uint32 AOCL_gettid(void) __attribute__((no_instrument_function));
|
||||
AOCL_TID AOCL_gettid(void) __attribute__((no_instrument_function));
|
||||
pid_t AOCL_getpid(void) __attribute__((no_instrument_function));
|
||||
uint64 AOCL_getTimestamp(void) __attribute__((no_instrument_function));
|
||||
|
||||
uint32 AOCL_gettid(void)
|
||||
AOCL_TID AOCL_gettid(void)
|
||||
{
|
||||
|
||||
#ifdef BLIS_ENABLE_OPENMP
|
||||
return omp_get_thread_num();
|
||||
#else
|
||||
return 0; // will not work for pthread-based parallelization
|
||||
|
||||
#ifdef BLIS_ENABLE_PTHREADS
|
||||
return pthread_self();
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
pid_t AOCL_getpid(void)
|
||||
@@ -63,7 +67,7 @@ uint64 AOCL_getTimestamp(void)
|
||||
/* The C11 way */
|
||||
if (clock_gettime(CLOCK_REALTIME, &tms))
|
||||
{
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* seconds, multiplied with 1 million */
|
||||
@@ -73,13 +77,13 @@ uint64 AOCL_getTimestamp(void)
|
||||
/* round up if necessary */
|
||||
if (tms.tv_nsec % 1000 >= 500)
|
||||
{
|
||||
++micros;
|
||||
++micros;
|
||||
}
|
||||
return micros;
|
||||
}
|
||||
|
||||
#else /* Non linux support */
|
||||
uint32 AOCL_gettid(void)
|
||||
AOCL_TID AOCL_gettid(void)
|
||||
{
|
||||
/* stub for other os's */
|
||||
return 0;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
|
||||
/*===================================================================
|
||||
* File Name : aocltpdef.h
|
||||
*
|
||||
*
|
||||
* Description : Abstraction for various datatypes used by DTL.
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc
|
||||
*
|
||||
* Copyright (C) 2020-2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*==================================================================*/
|
||||
#ifndef AOCL_TYPEDEF_H_
|
||||
#define AOCL_TYPEDEF_H_
|
||||
|
||||
@@ -57,15 +57,15 @@
|
||||
|
||||
|
||||
#define AOCL_MATRIX_INITIALISATION
|
||||
|
||||
#define BUFFER_SIZE 256
|
||||
|
||||
/* For BLIS since logs are collected at BLAS interfaces
|
||||
* we disable cblas interfaces for this benchmark application
|
||||
*/
|
||||
|
||||
#ifdef BLIS_ENABLE_CBLAS
|
||||
//#define CBLAS
|
||||
#endif
|
||||
#ifdef BLIS_ENABLE_CBLAS
|
||||
//#define CBLAS
|
||||
#endif
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
@@ -110,26 +110,36 @@ int main( int argc, char** argv )
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fprintf(fout, "Dt m\t n\t k\t lda\t ldb\t ldc\t rs_a rs_b rs_c transa transb \
|
||||
alphaR\t alphaI\t betaR\t betaI\t gflops\n");
|
||||
fprintf(fout, "Dt transa transb m n k alphaR alphaI lda ldb betaR betaI ldc gflops\n");
|
||||
|
||||
// Following variables are needed for scanf to read inputs properly
|
||||
// however they are not used in bench.
|
||||
char api_name[BUFFER_SIZE]; // to store function name, line no present in logs
|
||||
char dummy_buffer[BUFFER_SIZE];
|
||||
|
||||
// Variables extracted from the logs which are used by bench
|
||||
char stor_scheme, transA_c, transB_c;
|
||||
double alpha_r, beta_r, alpha_i, beta_i;
|
||||
dim_t m_trans, n_trans;
|
||||
char tmp[256]; // to store function name, line no present in logs.
|
||||
dim_t rs_a, rs_b, rs_c;
|
||||
dim_t cs_a, cs_b, cs_c;
|
||||
inc_t lda, ldb, ldc;
|
||||
|
||||
stor_scheme = 'C'; // since logs are collected at BLAS APIs
|
||||
stor_scheme = 'C'; // By default set it to Column Major
|
||||
|
||||
while (fscanf(fin, "%s %c %ld %ld %ld %ld %ld %ld %ld %ld %ld %c %c %lf %lf %lf %lf\n",
|
||||
tmp, &dt_ch, &m, &n, &k, &cs_a, &cs_b, &cs_c, &rs_a, &rs_b, &rs_c,
|
||||
&transA_c, &transB_c, &alpha_r, &alpha_i, &beta_r, &beta_i) == 17)
|
||||
//{S, D, C, Z} transa, transb, m, n, k, alpha_real, alpha_imag, lda ldb
|
||||
// beta_real, beta_imag, ldc,
|
||||
//
|
||||
// number of threads, execution time, gflops ---> ignored by bench
|
||||
|
||||
while (fscanf(fin, "%s %c %c %c %ld %ld %ld %lf %lf %ld %ld %lf %lf %ld[^\n]",
|
||||
api_name, &dt_ch, &transA_c, &transB_c, &m, &n, &k, &alpha_r, &alpha_i,
|
||||
&lda, &ldb, &beta_r, &beta_i, &ldc) == 14)
|
||||
{
|
||||
|
||||
if(cs_a==1 && cs_b==1 && cs_c==1) stor_scheme = 'R';
|
||||
if(rs_a==1 && rs_b==1 && rs_c==1) stor_scheme = 'C';
|
||||
// Discard any extra data on current line in the input file.
|
||||
fgets(dummy_buffer, BUFFER_SIZE, fin );
|
||||
|
||||
// At BLAS level only column major order is supported.
|
||||
stor_scheme = 'C';
|
||||
|
||||
if (dt_ch == 'D' || dt_ch == 'd') dt = BLIS_DOUBLE;
|
||||
else if (dt_ch == 'Z' || dt_ch == 'z') dt = BLIS_DCOMPLEX;
|
||||
@@ -164,10 +174,7 @@ int main( int argc, char** argv )
|
||||
|
||||
if( (stor_scheme == 'C') || (stor_scheme == 'c') )
|
||||
{
|
||||
// Column storage
|
||||
lda = cs_a; ldb = cs_b; ldc = cs_c;
|
||||
|
||||
// leading dimension should be greater than number of rows
|
||||
// leading dimension should be greater than number of rows
|
||||
// if ((m > lda) || (k > ldb) || (m > ldc)) continue;
|
||||
// Since this bench app is run on logs generated by AOCL trace logs
|
||||
// - we have relaxed the checks on the input parameters.
|
||||
@@ -190,14 +197,12 @@ int main( int argc, char** argv )
|
||||
}
|
||||
else if( (stor_scheme == 'r') || (stor_scheme == 'R') )
|
||||
{
|
||||
// Row-major order
|
||||
lda = rs_a; ldb = rs_b; ldc = rs_c;
|
||||
//leading dimension should be greater than number of columns
|
||||
//if ((k > lda) || (n > ldb) || (n > ldc)) continue;
|
||||
// Since this bench app is run on logs generated by AOCL trace logs
|
||||
// - we have relaxed the checks on the input parameters.
|
||||
|
||||
// if A is transpose - A(k x lda), lda >= max(1,m)
|
||||
// if A is transpose - A(k x lda), lda >= max(1,m)
|
||||
// if A is non-transpose - A (m x lda), lda >= max(1,k)
|
||||
// if B is transpose - B (n x ldb), ldb >= max(1,k)
|
||||
// if B is non-transpose - B (k x ldb ), ldb >= max(1,n)
|
||||
@@ -228,7 +233,7 @@ int main( int argc, char** argv )
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef AOCL_MATRIX_INITIALISATION
|
||||
bli_randm( &a );
|
||||
bli_randm( &b );
|
||||
@@ -474,9 +479,8 @@ int main( int argc, char** argv )
|
||||
(unsigned long)n,
|
||||
(unsigned long)k, gflops);
|
||||
|
||||
fprintf (fout, "%c %ld\t %ld\t %ld\t %ld\t %ld\t %ld\t %ld %ld %ld %c %c %lf\t %lf\t %lf\t %lf\t %6.3f\n", \
|
||||
dt_ch, m, n, k, lda, ldb, ldc, rs_a, rs_b, rs_c, \
|
||||
transA_c, transB_c, alpha_r, alpha_i, beta_r, beta_i, gflops);
|
||||
fprintf (fout, "%c %c %c %ld %ld %ld %lf %lf %ld %ld %lf %lf %ld %6.3f\n", \
|
||||
dt_ch, transA_c, transB_c, m, n, k, alpha_r, alpha_i, lda, ldb, beta_r, beta_i, ldc, gflops);
|
||||
|
||||
fflush(fout);
|
||||
|
||||
|
||||
@@ -1,18 +1,32 @@
|
||||
bli_gemm_ex:125: D 173 23 1 173 174 174 1 1 1 t n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 173 23 1 1 1 1 1 23 23 t n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 173 23 1 1 1 1 1 23 23 n t -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 83 23 1 83 84 84 1 1 1 n n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 41 2 1 41 42 42 1 1 1 n n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 77 8 1 77 78 78 1 1 1 n t -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 77 8 1 77 78 78 1 1 1 n n -2.000000 0.000000 3.000000 0.000000
|
||||
bli_gemm_ex:125: D 41 5 1 41 42 42 1 1 1 n n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 41 5 1 41 42 42 1 1 1 t n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 65 8 1 65 66 66 1 1 1 n n -3.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 53 8 1 53 54 54 1 1 1 n n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 68 8 1 68 69 69 1 1 1 n n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 41 5 1 41 42 42 1 1 1 n t -1.000000 0.000000 2.000000 0.000000
|
||||
bli_gemm_ex:125: D 41 5 1 41 42 42 1 1 1 n n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 53 5 1 53 54 54 1 1 1 n n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 95 14 1 95 96 96 1 1 1 t n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 110 17 1 1 1 1 1 17 17 n n -1.000000 0.000000 1.000000 0.000000
|
||||
bli_gemm_ex:125: D 95 14 1 95 96 96 1 1 1 n n -1.000000 0.000000 1.000000 0.000000
|
||||
dgemm_ D N N 1000 3000 2000 0.900000 0.000000 4000 5000 -1.100000 0.000000 6000 nt=4 1542.854 ms 7.778 GFLOPS
|
||||
dgemm_ D N N 100 100 100 0.900000 0.000000 104 104 -1.100000 0.000000 104 nt=4 0.307 ms 6.515 GFLOPS
|
||||
dgemm_ D N N 500 500 500 0.900000 0.000000 504 504 -1.100000 0.000000 504 nt=4 32.442 ms 7.706 GFLOPS
|
||||
dgemm_ D N N 900 900 900 0.900000 0.000000 904 904 -1.100000 0.000000 904 nt=4 172.170 ms 8.468 GFLOPS
|
||||
dgemm_ D N N 1300 1300 1300 0.900000 0.000000 1304 1304 -1.100000 0.000000 1304 nt=4 655.381 ms 6.704 GFLOPS
|
||||
dgemm_ D N T 1700 1700 1700 0.900000 0.000000 1704 1704 -1.100000 0.000000 1704 nt=4 1302.928 ms 7.541 GFLOPS
|
||||
dgemm_ D T N 2100 2100 2100 0.900000 0.000000 2104 2104 -1.100000 0.000000 2104 nt=4 3278.541 ms 5.649 GFLOPS
|
||||
dgemm_ D T T 2500 2500 2500 0.900000 0.000000 2504 2504 -1.100000 0.000000 2504 nt=4 5292.842 ms 5.904 GFLOPS
|
||||
zgemm_ Z N N 1000 3000 2000 0.900000 0.000000 4000 5000 -1.100000 0.000000 6000 nt=4 300.940 ms 159.500 GFLOPS
|
||||
zgemm_ Z N N 100 100 100 0.900000 0.000000 104 104 -1.100000 0.000000 104 nt=4 0.748 ms 10.695 GFLOPS
|
||||
zgemm_ Z N N 500 500 500 0.900000 0.000000 504 504 -1.100000 0.000000 504 nt=4 8.618 ms 116.036 GFLOPS
|
||||
zgemm_ Z N N 900 900 900 0.900000 0.000000 904 904 -1.100000 0.000000 904 nt=4 42.717 ms 136.526 GFLOPS
|
||||
zgemm_ Z N N 1300 1300 1300 0.900000 0.000000 1304 1304 -1.100000 0.000000 1304 nt=4 124.652 ms 141.001 GFLOPS
|
||||
zgemm_ Z N T 1700 1700 1700 0.900000 0.000000 1704 1704 -1.100000 0.000000 1704 nt=4 277.029 ms 141.877 GFLOPS
|
||||
zgemm_ Z T N 2100 2100 2100 0.900000 0.000000 2104 2104 -1.100000 0.000000 2104 nt=4 494.360 ms 149.866 GFLOPS
|
||||
zgemm_ Z T T 2500 2500 2500 0.900000 0.000000 2504 2504 -1.100000 0.000000 2504 nt=4 803.699 ms 155.531 GFLOPS
|
||||
cgemm_ C N N 1000 3000 2000 0.900000 0.000000 4000 5000 -1.100000 0.000000 6000 nt=4 135.321 ms 354.712 GFLOPS
|
||||
cgemm_ C N N 100 100 100 0.900000 0.000000 104 104 -1.100000 0.000000 104 nt=4 0.429 ms 18.648 GFLOPS
|
||||
cgemm_ C N N 500 500 500 0.900000 0.000000 504 504 -1.100000 0.000000 504 nt=4 5.045 ms 198.216 GFLOPS
|
||||
cgemm_ C N N 900 900 900 0.900000 0.000000 904 904 -1.100000 0.000000 904 nt=4 20.003 ms 291.556 GFLOPS
|
||||
cgemm_ C N N 1300 1300 1300 0.900000 0.000000 1304 1304 -1.100000 0.000000 1304 nt=4 56.253 ms 312.446 GFLOPS
|
||||
cgemm_ C N T 1700 1700 1700 0.900000 0.000000 1704 1704 -1.100000 0.000000 1704 nt=4 116.948 ms 336.081 GFLOPS
|
||||
cgemm_ C T N 2100 2100 2100 0.900000 0.000000 2104 2104 -1.100000 0.000000 2104 nt=4 207.581 ms 356.911 GFLOPS
|
||||
cgemm_ C T T 2500 2500 2500 0.900000 0.000000 2504 2504 -1.100000 0.000000 2504 nt=4 346.031 ms 361.239 GFLOPS
|
||||
sgemm_ S N N 1000 3000 2000 0.900000 0.000000 4000 5000 -1.100000 0.000000 6000 nt=4 1024.360 ms 11.715 GFLOPS
|
||||
sgemm_ S N N 100 100 100 0.900000 0.000000 104 104 -1.100000 0.000000 104 nt=4 0.362 ms 5.525 GFLOPS
|
||||
sgemm_ S N N 500 500 500 0.900000 0.000000 504 504 -1.100000 0.000000 504 nt=4 1.688 ms 148.104 GFLOPS
|
||||
sgemm_ S N N 900 900 900 0.900000 0.000000 904 904 -1.100000 0.000000 904 nt=4 147.791 ms 9.865 GFLOPS
|
||||
sgemm_ S N N 1300 1300 1300 0.900000 0.000000 1304 1304 -1.100000 0.000000 1304 nt=4 451.156 ms 9.739 GFLOPS
|
||||
sgemm_ S N T 1700 1700 1700 0.900000 0.000000 1704 1704 -1.100000 0.000000 1704 nt=4 873.577 ms 11.248 GFLOPS
|
||||
sgemm_ S T N 2100 2100 2100 0.900000 0.000000 2104 2104 -1.100000 0.000000 2104 nt=4 1699.278 ms 10.900 GFLOPS
|
||||
sgemm_ S T T 2500 2500 2500 0.900000 0.000000 2504 2504 -1.100000 0.000000 2504 nt=4 2651.917 ms 11.784 GFLOPS
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
Dt n incx incy gflops
|
||||
isamax_:183: S 100 1 29 0.043
|
||||
isamax_:183: S 200 1 65 0.065
|
||||
isamax_:183: S 300 1 185 0.078
|
||||
isamax_:183: S 400 1 86 0.261
|
||||
isamax_:183: S 500 1 271 0.279
|
||||
idamax_:183: D 100 1 64 0.099
|
||||
idamax_:183: D 200 1 175 0.131
|
||||
idamax_:183: D 300 1 102 0.148
|
||||
idamax_:183: D 400 1 249 0.157
|
||||
idamax_:183: D 500 1 197 0.165
|
||||
icamax_:183: C 100 1 1 0.185
|
||||
icamax_:183: C 200 1 108 0.242
|
||||
icamax_:183: C 300 1 76 0.271
|
||||
icamax_:183: C 400 1 178 0.283
|
||||
icamax_:183: C 500 1 403 0.304
|
||||
izamax_:183: Z 100 1 51 0.178
|
||||
izamax_:183: Z 200 1 175 0.232
|
||||
izamax_:183: Z 300 1 240 0.260
|
||||
izamax_:183: Z 400 1 108 0.293
|
||||
izamax_:183: Z 500 1 411 0.294
|
||||
@@ -46,7 +46,6 @@ err_t bli_gemmsup
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2);
|
||||
// AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_2, alpha, a, b, beta, c);
|
||||
|
||||
// Return early if small matrix handling is disabled at configure-time.
|
||||
#ifdef BLIS_DISABLE_SUP_HANDLING
|
||||
|
||||
@@ -46,7 +46,6 @@ err_t bli_gemmsup_ref
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3);
|
||||
// AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_3, alpha, a, b, beta, c);
|
||||
// This function implements the default gemmsup handler. If you are a
|
||||
// BLIS developer and wish to use a different gemmsup handler, please
|
||||
// register a different function pointer in the context in your
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -48,7 +48,6 @@ void bli_gemm_front
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3);
|
||||
// AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_3, alpha, a, b, beta, c);
|
||||
bli_init_once();
|
||||
|
||||
obj_t a_local;
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -54,8 +54,7 @@ void bli_gemm_int
|
||||
gemm_var_oft f;
|
||||
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4);
|
||||
// AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_4, alpha, a, b, beta, c);
|
||||
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_gemm_basic_check( alpha, a, b, beta, c, cntx );
|
||||
|
||||
@@ -65,9 +65,12 @@ void PASTEF77(ch,blasname) \
|
||||
inc_t rs_b, cs_b; \
|
||||
inc_t rs_c, cs_c; \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
|
||||
\
|
||||
/* Perform BLAS parameter checking. */ \
|
||||
PASTEBLACHK(blasname) \
|
||||
@@ -118,6 +121,7 @@ void PASTEF77(ch,blasname) \
|
||||
NULL \
|
||||
); \
|
||||
\
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
@@ -142,18 +146,20 @@ void PASTEF77(ch,blasname) \
|
||||
ftype* c, const f77_int* ldc \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, (void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
|
||||
\
|
||||
trans_t blis_transa; \
|
||||
trans_t blis_transb; \
|
||||
dim_t m0, n0, k0; \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_INFO) \
|
||||
\
|
||||
dim_t m0_a, n0_a; \
|
||||
dim_t m0_b, n0_b; \
|
||||
\
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *transa, *transb, *m, *n, *k, \
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc); \
|
||||
\
|
||||
/* Perform BLAS parameter checking. */ \
|
||||
PASTEBLACHK(blasname) \
|
||||
@@ -217,6 +223,7 @@ void PASTEF77(ch,blasname) \
|
||||
NULL \
|
||||
); \
|
||||
} \
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k); \
|
||||
return; \
|
||||
} \
|
||||
else if( m0 == 1 ) \
|
||||
@@ -249,6 +256,7 @@ void PASTEF77(ch,blasname) \
|
||||
NULL \
|
||||
); \
|
||||
} \
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
@@ -284,7 +292,8 @@ void PASTEF77(ch,blasname) \
|
||||
NULL \
|
||||
); \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
}
|
||||
@@ -306,15 +315,19 @@ void dgemm_
|
||||
double* c, const f77_int* ldc
|
||||
)
|
||||
{
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, 'D', *transa, *transb, *m, *n, *k, (void*)alpha, *lda, *ldb, (void*)beta, *ldc);
|
||||
|
||||
|
||||
|
||||
trans_t blis_transa;
|
||||
trans_t blis_transb;
|
||||
dim_t m0, n0, k0;
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_INFO)
|
||||
|
||||
/* Initialize BLIS. */
|
||||
bli_init_auto();
|
||||
/* Initialize BLIS. */
|
||||
bli_init_auto();
|
||||
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1)
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(d), *transa, *transb, *m, *n, *k, \
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc);
|
||||
|
||||
/* Perform BLAS parameter checking. */
|
||||
PASTEBLACHK(gemm)
|
||||
@@ -358,7 +371,8 @@ void dgemm_
|
||||
(double*)beta,
|
||||
c, *ldc
|
||||
);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS */
|
||||
bli_finalize_auto();
|
||||
|
||||
@@ -395,6 +409,9 @@ void dgemm_
|
||||
((void*)0)
|
||||
);
|
||||
}
|
||||
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
|
||||
return;
|
||||
}
|
||||
else if (m0 == 1)
|
||||
@@ -427,6 +444,7 @@ void dgemm_
|
||||
((void*)0)
|
||||
);
|
||||
}
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -478,8 +496,9 @@ void dgemm_
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
bli_finalize_auto();
|
||||
return;
|
||||
@@ -519,7 +538,8 @@ void dgemm_
|
||||
|
||||
if (status == BLIS_SUCCESS)
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
bli_finalize_auto();
|
||||
|
||||
@@ -532,7 +552,8 @@ void dgemm_
|
||||
err_t status = bli_gemmsup(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
if (status == BLIS_SUCCESS)
|
||||
{
|
||||
return;
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
return;
|
||||
}
|
||||
|
||||
// fall back on native path when dgemm is not handled in sup path.
|
||||
@@ -550,7 +571,8 @@ void dgemm_
|
||||
/* NULL */
|
||||
/* ); */
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
bli_finalize_auto();
|
||||
} // end of dgemm_
|
||||
@@ -569,15 +591,16 @@ void zgemm_
|
||||
dcomplex* c, const f77_int* ldc
|
||||
)
|
||||
{
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, 'Z', *transa, *transb, *m, *n, *k, (void*)alpha, *lda, *ldb, (void*)beta, *ldc);
|
||||
|
||||
trans_t blis_transa;
|
||||
trans_t blis_transb;
|
||||
dim_t m0, n0, k0;
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_INFO)
|
||||
|
||||
/* Initialize BLIS. */
|
||||
bli_init_auto();
|
||||
/* Initialize BLIS. */
|
||||
bli_init_auto();
|
||||
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1)
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *transa, *transb, *m, *n, *k,
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc);
|
||||
|
||||
/* Perform BLAS parameter checking. */
|
||||
PASTEBLACHK(gemm)
|
||||
@@ -655,11 +678,12 @@ void zgemm_
|
||||
NULL
|
||||
);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
|
||||
/* Finalize BLIS. */
|
||||
bli_finalize_auto();
|
||||
return;
|
||||
}
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
/* Finalize BLIS. */
|
||||
bli_finalize_auto();
|
||||
return;
|
||||
}
|
||||
|
||||
// The code below will be called when number of threads = 1.
|
||||
#if ENABLE_INDUCED_METHOD
|
||||
@@ -686,7 +710,8 @@ void zgemm_
|
||||
//sqp algo is found better for n > 40
|
||||
if(bli_gemm_sqp(&alphao, &ao, &bo, &betao, &co, NULL, NULL)==BLIS_SUCCESS)
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO)
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -699,17 +724,20 @@ void zgemm_
|
||||
err_t status = bli_gemmsup(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
if(status==BLIS_SUCCESS)
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO)
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
// fall back on native path when zgemm is not handled in sup path.
|
||||
bli_gemmnat(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO)
|
||||
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
|
||||
return;
|
||||
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO)
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
|
||||
/* Finalize BLIS. */
|
||||
bli_finalize_auto();
|
||||
}// end of zgemm_
|
||||
@@ -738,15 +766,16 @@ void dzgemm_
|
||||
)
|
||||
{
|
||||
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, 'Z', *transa, *transb, *m, *n, *k, (void*)alpha, *lda, *ldb, (void*)beta, *ldc);
|
||||
|
||||
trans_t blis_transa;
|
||||
trans_t blis_transb;
|
||||
dim_t m0, n0, k0;
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_INFO)
|
||||
|
||||
/* Initialize BLIS. */
|
||||
bli_init_auto();
|
||||
/* Initialize BLIS. */
|
||||
bli_init_auto();
|
||||
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1)
|
||||
AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(z), *transa, *transb, *m, *n, *k,
|
||||
(void*)alpha, *lda, *ldb, (void*)beta, *ldc);
|
||||
|
||||
/* Perform BLAS parameter checking. */
|
||||
PASTEBLACHK(gemm)
|
||||
@@ -808,7 +837,8 @@ void dzgemm_
|
||||
// fall back on native path when zgemm is not handled in sup path.
|
||||
bli_gemmnat(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO)
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
|
||||
/* Finalize BLIS. */
|
||||
bli_finalize_auto();
|
||||
}// end of dzgemm_
|
||||
|
||||
Reference in New Issue
Block a user