Added debug trace and log support for copy and ger routines

Change-Id: Id7fb64c0a626b2f8f53e89ee7df4391693eb4f4c
This commit is contained in:
Meghana Vankadari
2020-10-30 16:49:59 +05:30
parent 65daaab6ac
commit 0775f09b41
13 changed files with 461 additions and 115 deletions

View File

@@ -418,5 +418,65 @@ void AOCL_DTL_log_gemv_sizes( int8 loglevel,
}
void AOCL_DTL_log_ger_sizes( int8 loglevel,
char dt_type,
const f77_int m,
const f77_int n,
const void* alpha,
const f77_int incx,
const f77_int incy,
const f77_int lda,
const char* filename,
const char* function_name,
int line
)
{
char buffer[256];
double alpha_real, alpha_imag;
if(dt_type == 's' || dt_type == 'S' )
{
alpha_real = *(float*)alpha;
alpha_imag = 0.0;
}
else if(dt_type == 'd' || dt_type == 'D' )
{
alpha_real = *(double*) alpha;
alpha_imag = 0.0;
}
else if(dt_type == 'c' || dt_type == 'C' )
{
alpha_real = (float)(((scomplex*)alpha)->real);
alpha_imag = (float)(((scomplex*)alpha)->imag);
}
else if(dt_type == 'z' || dt_type == 'Z' )
{
alpha_real = ((dcomplex*)alpha)->real;
alpha_imag = ((dcomplex*)alpha)->imag;
}
sprintf(buffer, "%c %ld %ld %lf %lf %ld %ld %ld", dt_type, (dim_t)m, (dim_t)n, alpha_real, alpha_imag, (dim_t)incx, (dim_t)incy, (dim_t)lda );
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
}
// Level-1
void AOCL_DTL_log_copy_sizes( int8 loglevel,
char dt_type,
const f77_int n,
const f77_int incx,
const f77_int incy,
const char* filename,
const char* function_name,
int line
)
{
char buffer[256];
// {S, D, C, Z} {n, incx, incy}
sprintf(buffer, "%c %ld %ld %ld", dt_type, (dim_t)n, (dim_t)incx, (dim_t)incy);
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
}
#endif

View File

@@ -59,6 +59,30 @@ void AOCL_DTL_log_gemv_sizes( int8 loglevel,
const char* function_name,
int line);
void AOCL_DTL_log_ger_sizes( int8 loglevel,
char dt_type,
const f77_int m,
const f77_int n,
const void* alpha,
const f77_int incx,
const f77_int incy,
const f77_int lda,
const char* filename,
const char* function_name,
int line
);
// Level-1
void AOCL_DTL_log_copy_sizes( int8 loglevel,
char dt_type,
const f77_int n,
const f77_int incx,
const f77_int incy,
const char* filename,
const char* function_name,
int line
);
#define AOCL_DTL_LOG_GEMM_INPUTS(loglevel, alpha, a, b, beta, c) \
AOCL_DTL_log_gemm_sizes(loglevel, alpha, a, b, beta, c, __FILE__, __FUNCTION__, __LINE__);
@@ -78,6 +102,12 @@ void AOCL_DTL_log_gemv_sizes( int8 loglevel,
#define AOCL_DTL_LOG_GEMV_INPUTS(loglevel, dt_type, transa, m, n, alp, lda, incx, beta, incy) \
AOCL_DTL_log_gemv_sizes(loglevel, dt_type, transa, m, n, alp, lda, incx, beta, incy, __FILE__,\
__FUNCTION__, __LINE__);
#define AOCL_DTL_LOG_GER_INPUTS(loglevel, dt_type, m, n, alpha, incx, incy, lda) \
AOCL_DTL_log_ger_sizes(loglevel, dt_type, m, n, alpha, incx, incy, lda, __FILE__, __FUNCTION__, __LINE__);
// Level-1 Macros
#define AOCL_DTL_LOG_COPY_INPUTS(loglevel, dt_type, n, incx, incy) \
AOCL_DTL_log_copy_sizes(loglevel, dt_type, n, incx, incy, __FILE__, __FUNCTION__, __LINE__);
#else
@@ -93,6 +123,10 @@ void AOCL_DTL_log_gemv_sizes( int8 loglevel,
#define AOCL_DTL_LOG_GEMV_INPUTS(loglevel, dt_type, transa, m, n, alp, lda, incx, beta, incy)
#define AOCL_DTL_LOG_GER_INPUTS(loglevel, dt_type, m, n, alpha, incx, incy, lda)
#define AOCL_DTL_LOG_COPY_INPUTS(loglevel, dt_type, n, incx, incy)
#endif

View File

@@ -53,6 +53,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -72,6 +74,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
y, incy, \
cntx \
); \
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( addv, BLIS_ADDV_KER )
@@ -90,6 +94,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -108,6 +114,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
index, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( amaxv, BLIS_AMAXV_KER )
@@ -127,6 +134,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -148,6 +157,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
y, incy, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER )
@@ -166,6 +176,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -187,6 +199,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
y, incy, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( axpyv, BLIS_AXPYV_KER )
@@ -207,6 +220,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -228,6 +243,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
rho, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( dotv, BLIS_DOTV_KER )
@@ -249,6 +265,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -272,6 +290,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
rho, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( dotxv, BLIS_DOTXV_KER )
@@ -287,6 +306,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -304,6 +325,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
x, incx, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( invertv, BLIS_INVERTV_KER )
@@ -321,6 +343,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -340,6 +364,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
x, incx, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( scalv, BLIS_SCALV_KER )
@@ -357,6 +382,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -375,6 +402,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
y, incy, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( swapv, BLIS_SWAPV_KER )
@@ -392,6 +420,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -412,6 +442,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
y, incy, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER )

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -57,6 +58,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -67,7 +70,11 @@ void PASTEMAC2(ch,opname,EX_SUF) \
bli_set_dims_with_trans( transa, m, n, &m_y, &n_x ); \
\
/* If y has zero elements, return early. */ \
if ( bli_zero_dim1( m_y ) ) return; \
if ( bli_zero_dim1( m_y ) ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
return; \
} \
\
/* Obtain a valid context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
@@ -85,6 +92,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
cntx, \
NULL \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
return; \
} \
\
@@ -118,6 +126,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
y, incy, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC3( gemv, gemv, gemv_unf_var1, gemv_unf_var2 )
@@ -139,12 +148,18 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
\
/* If x or y has zero elements, or if alpha is zero, return early. */ \
if ( bli_zero_dim2( m, n ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \
if ( bli_zero_dim2( m, n ) || PASTEMAC(ch,eq0)( *alpha ) ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
return; \
} \
\
/* Obtain a valid context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
@@ -170,6 +185,9 @@ void PASTEMAC2(ch,opname,EX_SUF) \
a, rs_a, cs_a, \
cntx \
); \
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
\
}
INSERT_GENTFUNC_BASIC3( ger, ger, ger_unb_var1, ger_unb_var2 )
@@ -192,6 +210,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -212,6 +232,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
cntx, \
NULL \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
return; \
} \
\
@@ -246,6 +267,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
y, incy, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC4( hemv, hemv, BLIS_CONJUGATE, hemv_unf_var1, hemv_unf_var3 )
@@ -266,6 +288,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
@@ -273,7 +297,11 @@ void PASTEMAC2(ch,opname,EX_SUF) \
ctype alpha_local; \
\
/* If x has zero elements, or if alpha is zero, return early. */ \
if ( bli_zero_dim1( m ) || PASTEMAC(chr,eq0)( *alpha ) ) return; \
if ( bli_zero_dim1( m ) || PASTEMAC(chr,eq0)( *alpha ) ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
return; \
} \
\
/* Make a local copy of alpha, cast into the complex domain. This
allows us to use the same underlying her variants to implement
@@ -311,6 +339,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
a, rs_a, cs_a, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNCR_BASIC4( her, her, BLIS_CONJUGATE, her_unb_var1, her_unb_var2 )
@@ -330,12 +359,18 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
\
/* If x has zero elements, or if alpha is zero, return early. */ \
if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \
if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
return; \
} \
\
/* Obtain a valid context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
@@ -368,6 +403,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
a, rs_a, cs_a, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC4( syr, her, BLIS_NO_CONJUGATE, her_unb_var1, her_unb_var2 )
@@ -389,12 +425,18 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
\
/* If x has zero elements, or if alpha is zero, return early. */ \
if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \
if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
return; \
} \
\
/* Obtain a valid context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
@@ -429,6 +471,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
a, rs_a, cs_a, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC4( her2, her2, BLIS_CONJUGATE, her2_unf_var1, her2_unf_var4 )
@@ -450,12 +493,18 @@ void PASTEMAC2(ch,opname,EX_SUF) \
BLIS_TAPI_EX_PARAMS \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
\
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
\
/* If x has zero elements, return early. */ \
if ( bli_zero_dim1( m ) ) return; \
if ( bli_zero_dim1( m ) ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
return; \
} \
\
/* Obtain a valid context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
@@ -472,6 +521,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
cntx, \
NULL \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
return; \
} \
\
@@ -503,6 +553,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
x, incx, \
cntx \
); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
}
INSERT_GENTFUNC_BASIC3( trmv, trmv, trmv_unf_var1, trmv_unf_var2 )

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -50,6 +51,8 @@ void PASTEMAC(ch,varname) \
cntx_t* cntx \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3) \
\
const num_t dt = PASTEMAC(ch,type); \
\
ctype* a1t; \
@@ -83,6 +86,8 @@ void PASTEMAC(ch,varname) \
cntx \
); \
} \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3) \
\
}
INSERT_GENTFUNC_BASIC0( ger_unb_var1 )

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -50,6 +51,8 @@ void PASTEMAC(ch,varname) \
cntx_t* cntx \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3); \
\
const num_t dt = PASTEMAC(ch,type); \
\
ctype* a1; \
@@ -83,6 +86,8 @@ void PASTEMAC(ch,varname) \
cntx \
); \
} \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3) \
\
}
INSERT_GENTFUNC_BASIC0( ger_unb_var2 )

View File

@@ -54,6 +54,9 @@ void PASTEF77(ch,blasname) \
ftype* y0; \
inc_t incx0; \
inc_t incy0; \
\
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
AOCL_DTL_LOG_COPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *n, *incx, *incy) \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
@@ -77,6 +80,9 @@ void PASTEF77(ch,blasname) \
NULL \
); \
\
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
\
/* Finalize BLIS. */ \
bli_finalize_auto(); \
}
@@ -97,6 +103,8 @@ void scopy_
inc_t incx0;
inc_t incy0;
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1)
AOCL_DTL_LOG_COPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, 'S', *n, *incx, *incy)
/* Initialize BLIS. */
// bli_init_auto();
@@ -156,6 +164,7 @@ void scopy_
NULL
);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
/* Finalize BLIS. */
// bli_finalize_auto();
}
@@ -173,6 +182,8 @@ void dcopy_
inc_t incx0;
inc_t incy0;
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
AOCL_DTL_LOG_COPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, 'D', *n, *incx, *incy)
/* Initialize BLIS. */
// bli_init_auto();
@@ -232,6 +243,7 @@ void dcopy_
NULL
);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
/* Finalize BLIS. */
// bli_finalize_auto();
}

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -51,6 +52,9 @@ void PASTEF772(ch,blasname,chc) \
ftype* a, const f77_int* lda \
) \
{ \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
AOCL_DTL_LOG_GER_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, (void*)alpha, *incx, *incy, *lda) \
\
dim_t m0, n0; \
ftype* x0; \
ftype* y0; \
@@ -101,6 +105,8 @@ void PASTEF772(ch,blasname,chc) \
NULL, \
NULL \
); \
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
\
/* Finalize BLIS. */ \
bli_finalize_auto(); \

View File

@@ -65,6 +65,8 @@ void bli_saxpyv_zen_int10
cntx_t* restrict cntx
)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4)
const dim_t n_elem_per_reg = 8;
dim_t i;
@@ -78,7 +80,11 @@ void bli_saxpyv_zen_int10
__m256 zv[10];
// If the vector dimension is zero, or if alpha is zero, return early.
if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return;
if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) )
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
return;
}
// Initialize local pointers.
x0 = x;
@@ -257,6 +263,7 @@ void bli_saxpyv_zen_int10
y0 += incy;
}
}
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
}
// -----------------------------------------------------------------------------
@@ -271,6 +278,8 @@ void bli_daxpyv_zen_int10
cntx_t* restrict cntx
)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4)
const dim_t n_elem_per_reg = 4;
dim_t i;
@@ -284,7 +293,11 @@ void bli_daxpyv_zen_int10
__m256d zv[10];
// If the vector dimension is zero, or if alpha is zero, return early.
if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return;
if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) )
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
return;
}
// Initialize local pointers.
x0 = x;
@@ -463,6 +476,7 @@ void bli_daxpyv_zen_int10
y0 += incy;
}
}
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
}
// -----------------------------------------------------------------------------
@@ -477,6 +491,8 @@ void bli_caxpyv_zen_int5
cntx_t* restrict cntx
)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4)
const dim_t n_elem_per_reg = 8;
dim_t i;
@@ -497,7 +513,11 @@ void bli_caxpyv_zen_int5
conj_t conjx_use = conjx;
// If the vector dimension is zero, or if alpha is zero, return early.
if ( bli_zero_dim1( n ) || PASTEMAC(c,eq0)( *alpha ) ) return;
if ( bli_zero_dim1( n ) || PASTEMAC(c,eq0)( *alpha ) )
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
return;
}
// Initialize local pointers.
x0 = (float*)x;
@@ -756,6 +776,7 @@ void bli_caxpyv_zen_int5
}
}
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
}
// -----------------------------------------------------------------------------
@@ -770,6 +791,8 @@ void bli_zaxpyv_zen_int5
cntx_t* restrict cntx
)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4)
const dim_t n_elem_per_reg = 4;
dim_t i;
@@ -789,7 +812,11 @@ void bli_zaxpyv_zen_int5
conj_t conjx_use = conjx;
// If the vector dimension is zero, or if alpha is zero, return early.
if ( bli_zero_dim1( n ) || PASTEMAC(z,eq0)( *alpha ) ) return;
if ( bli_zero_dim1( n ) || PASTEMAC(z,eq0)( *alpha ) )
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
return;
}
// Initialize local pointers.
x0 = (double*)x;
@@ -1057,4 +1084,5 @@ void bli_zaxpyv_zen_int5
}
}
}
}
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
}

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc.
Copyright (C) 2019-2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -46,12 +46,18 @@ void bli_scopyv_zen_int
cntx_t* restrict cntx
)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2)
const dim_t num_elem_per_reg = 8;
__m256 xv[16];
dim_t i = 0;
// If the vector dimension is zero return early.
if (bli_zero_dim1(n)) return;
if (bli_zero_dim1(n))
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
return;
}
if (incx == 1 && incy == 1)
{
@@ -182,6 +188,7 @@ void bli_scopyv_zen_int
y += incy;
}
}
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
}
// -----------------------------------------------------------------------------
@@ -195,12 +202,17 @@ void bli_dcopyv_zen_int
cntx_t* restrict cntx
)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2)
const dim_t num_elem_per_reg = 4;
__m256d xv[16];
dim_t i = 0;
// If the vector dimension is zero return early.
if (bli_zero_dim1(n)) return;
if (bli_zero_dim1(n))
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
return;
}
if (incx == 1 && incy == 1)
{
@@ -326,5 +338,6 @@ void bli_dcopyv_zen_int
y += incy;
}
}
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
}

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -46,7 +47,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
cntx_t* restrict cntx \
) \
{ \
if ( bli_zero_dim1( n ) ) return; \
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3) \
\
if ( bli_zero_dim1( n ) ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3) \
return; \
} \
\
if ( bli_is_conj( conjx ) ) \
{ \
@@ -90,6 +97,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
} \
} \
} \
\
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3) \
\
}
INSERT_GENTFUNC_BASIC2( copyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc.
Copyright (C) 2019-2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -165,6 +165,32 @@ int main(int argc, char** argv)
yp, &incy
);
}
else if (bli_is_scomplex(dt))
{
f77_int nn = bli_obj_length(&x);
f77_int incx = bli_obj_vector_inc(&x);
scomplex* xp = bli_obj_buffer(&x);
f77_int incy = bli_obj_vector_inc(&y);
scomplex* yp = bli_obj_buffer(&y);
ccopy_( &nn,
xp, &incx,
yp, &incy
);
}
else if(bli_is_dcomplex(dt))
{
f77_int nn = bli_obj_length(&x);
f77_int incx = bli_obj_vector_inc(&x);
dcomplex* xp = bli_obj_buffer(&x);
f77_int incy = bli_obj_vector_inc(&y);
dcomplex* yp = bli_obj_buffer(&y);
zcopy_( &nn,
xp, &incx,
yp, &incy
);
}
#endif
dtime_save = bli_clock_min_diff(dtime_save, dtime);
#ifdef BLIS_ACCURACY_TEST
@@ -199,6 +225,7 @@ int main(int argc, char** argv)
if (p >= 10000)
p_inc = 10000;
Gbps = (n * sizeof_dt) / (dtime_save * 1.0e9);
if(bli_is_complex(dt)) Gbps *= 2;
#ifdef BLIS
printf("data_copyv_blis\t");
#else

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -46,155 +47,218 @@
int main( int argc, char** argv )
{
obj_t a, x, y;
obj_t a_save;
obj_t alpha;
dim_t m, n;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input;
num_t dt_a, dt_x, dt_y;
num_t dt_alpha;
int r, n_repeats;
obj_t a, x, y;
obj_t a_save;
obj_t alpha;
dim_t m, n;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input;
num_t dt_a, dt_x, dt_y, dt;
num_t dt_alpha;
int r, n_repeats;
double dtime;
double dtime_save;
double gflops;
double dtime;
double dtime_save;
double gflops;
//bli_init();
//bli_init();
n_repeats = 3;
n_repeats = 3;
#ifndef PRINT
p_begin = 40;
p_end = 2000;
p_inc = 40;
p_begin = 40;
p_end = 4000;
p_inc = 40;
m_input = -1;
n_input = -1;
m_input = -1;
n_input = -1;
#else
p_begin = 16;
p_end = 16;
p_inc = 1;
p_begin = 16;
p_end = 16;
p_inc = 1;
m_input = 15;
n_input = 15;
m_input = 15;
n_input = 15;
#endif
dt_alpha = dt_x = dt_y = dt_a = BLIS_DOUBLE;
dt = dt_alpha = dt_x = dt_y = dt_a = BLIS_FLOAT;
// Begin with initializing the last entry to zero so that
// matlab allocates space for the entire array once up-front.
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
// Begin with initializing the last entry to zero so that
// matlab allocates space for the entire array once up-front.
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
printf( "data_ger_blis" );
printf( "data_ger_blis" );
#else
printf( "data_ger_%s", BLAS );
printf( "data_ger_%s", BLAS );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin)/p_inc + 1,
( unsigned long )0,
( unsigned long )0, 0.0 );
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin)/p_inc + 1,
( unsigned long )0,
( unsigned long )0, 0.0 );
//for ( p = p_begin; p <= p_end; p += p_inc )
for ( p = p_end; p_begin <= p; p -= p_inc )
{
//for ( p = p_begin; p <= p_end; p += p_inc )
for ( p = p_end; p_begin <= p; p -= p_inc )
{
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bli_obj_create( dt_x, m, 1, 0, 0, &x );
bli_obj_create( dt_y, n, 1, 0, 0, &y );
bli_obj_create( dt_a, m, n, 0, 0, &a );
bli_obj_create( dt_a, m, n, 0, 0, &a_save );
bli_obj_create( dt_x, m, 1, 0, 0, &x );
bli_obj_create( dt_y, n, 1, 0, 0, &y );
bli_obj_create( dt_a, m, n, 0, 0, &a );
bli_obj_create( dt_a, m, n, 0, 0, &a_save );
bli_randm( &x );
bli_randm( &y );
bli_randm( &a );
bli_randm( &x );
bli_randm( &y );
bli_randm( &a );
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_setsc( (0.9/1.0), -1.1, &alpha );
bli_copym( &a, &a_save );
dtime_save = DBL_MAX;
bli_copym( &a, &a_save );
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &a_save, &a );
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &a_save, &a );
dtime = bli_clock();
dtime = bli_clock();
#ifdef PRINT
bli_printm( "x", &x, "%4.1f", "" );
bli_printm( "y", &y, "%4.1f", "" );
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "x", &x, "%4.1f", "" );
bli_printm( "y", &y, "%4.1f", "" );
bli_printm( "a", &a, "%4.1f", "" );
#endif
#ifdef BLIS
bli_ger( &alpha,
&x,
&y,
&a );
bli_ger( &alpha,
&x,
&y,
&a );
#else
f77_int mm = bli_obj_length( &a );
f77_int nn = bli_obj_width( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
double* alphap = bli_obj_buffer( &alpha );
double* xp = bli_obj_buffer( &x );
double* yp = bli_obj_buffer( &y );
double* ap = bli_obj_buffer( &a );
if(bli_is_float(dt))
{
f77_int mm = bli_obj_length( &a );
f77_int nn = bli_obj_width( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
float* alphap = bli_obj_buffer( &alpha );
float* xp = bli_obj_buffer( &x );
float* yp = bli_obj_buffer( &y );
float* ap = bli_obj_buffer( &a );
sger_( &mm,
&nn,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
}
else if(bli_is_double(dt))
{
f77_int mm = bli_obj_length( &a );
f77_int nn = bli_obj_width( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
double* alphap = bli_obj_buffer( &alpha );
double* xp = bli_obj_buffer( &x );
double* yp = bli_obj_buffer( &y );
double* ap = bli_obj_buffer( &a );
dger_( &mm,
&nn,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
}
else if(bli_is_scomplex(dt))
{
f77_int mm = bli_obj_length( &a );
f77_int nn = bli_obj_width( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* xp = bli_obj_buffer( &x );
scomplex* yp = bli_obj_buffer( &y );
scomplex* ap = bli_obj_buffer( &a );
cgeru_( &mm,
&nn,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
}
else if(bli_is_dcomplex(dt))
{
f77_int mm = bli_obj_length( &a );
f77_int nn = bli_obj_width( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* xp = bli_obj_buffer( &x );
dcomplex* yp = bli_obj_buffer( &y );
dcomplex* ap = bli_obj_buffer( &a );
zgeru_( &mm,
&nn,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
}
dger_( &mm,
&nn,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
#endif
#ifdef PRINT
bli_printm( "a after", &a, "%4.1f", "" );
exit(1);
bli_printm( "a after", &a, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 );
gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 );
if(bli_is_complex(dt)) gflops *= 4.0;
#ifdef BLIS
printf( "data_ger_blis" );
printf( "data_ger_blis" );
#else
printf( "data_ger_%s", BLAS );
printf( "data_ger_%s", BLAS );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin)/p_inc + 1,
( unsigned long )m,
( unsigned long )n, gflops );
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin)/p_inc + 1,
( unsigned long )m,
( unsigned long )n, gflops );
bli_obj_free( &alpha );
bli_obj_free( &alpha );
bli_obj_free( &x );
bli_obj_free( &y );
bli_obj_free( &a );
bli_obj_free( &a_save );
}
bli_obj_free( &x );
bli_obj_free( &y );
bli_obj_free( &a );
bli_obj_free( &a_save );
}
//bli_finalize();
//bli_finalize();
return 0;
return 0;
}