mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
Added debug trace and log support for copy and ger routines
Change-Id: Id7fb64c0a626b2f8f53e89ee7df4391693eb4f4c
This commit is contained in:
@@ -418,5 +418,65 @@ void AOCL_DTL_log_gemv_sizes( int8 loglevel,
|
||||
|
||||
}
|
||||
|
||||
void AOCL_DTL_log_ger_sizes( int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const void* alpha,
|
||||
const f77_int incx,
|
||||
const f77_int incy,
|
||||
const f77_int lda,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line
|
||||
)
|
||||
{
|
||||
char buffer[256];
|
||||
double alpha_real, alpha_imag;
|
||||
|
||||
if(dt_type == 's' || dt_type == 'S' )
|
||||
{
|
||||
alpha_real = *(float*)alpha;
|
||||
alpha_imag = 0.0;
|
||||
}
|
||||
else if(dt_type == 'd' || dt_type == 'D' )
|
||||
{
|
||||
alpha_real = *(double*) alpha;
|
||||
alpha_imag = 0.0;
|
||||
}
|
||||
else if(dt_type == 'c' || dt_type == 'C' )
|
||||
{
|
||||
alpha_real = (float)(((scomplex*)alpha)->real);
|
||||
alpha_imag = (float)(((scomplex*)alpha)->imag);
|
||||
}
|
||||
else if(dt_type == 'z' || dt_type == 'Z' )
|
||||
{
|
||||
alpha_real = ((dcomplex*)alpha)->real;
|
||||
alpha_imag = ((dcomplex*)alpha)->imag;
|
||||
}
|
||||
|
||||
sprintf(buffer, "%c %ld %ld %lf %lf %ld %ld %ld", dt_type, (dim_t)m, (dim_t)n, alpha_real, alpha_imag, (dim_t)incx, (dim_t)incy, (dim_t)lda );
|
||||
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
|
||||
}
|
||||
// Level-1
|
||||
|
||||
void AOCL_DTL_log_copy_sizes( int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int n,
|
||||
const f77_int incx,
|
||||
const f77_int incy,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line
|
||||
)
|
||||
{
|
||||
char buffer[256];
|
||||
// {S, D, C, Z} {n, incx, incy}
|
||||
sprintf(buffer, "%c %ld %ld %ld", dt_type, (dim_t)n, (dim_t)incx, (dim_t)incy);
|
||||
|
||||
DTL_Trace(loglevel, TRACE_TYPE_LOG, function_name, function_name, line, buffer);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -59,6 +59,30 @@ void AOCL_DTL_log_gemv_sizes( int8 loglevel,
|
||||
const char* function_name,
|
||||
int line);
|
||||
|
||||
void AOCL_DTL_log_ger_sizes( int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int m,
|
||||
const f77_int n,
|
||||
const void* alpha,
|
||||
const f77_int incx,
|
||||
const f77_int incy,
|
||||
const f77_int lda,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line
|
||||
);
|
||||
|
||||
// Level-1
|
||||
void AOCL_DTL_log_copy_sizes( int8 loglevel,
|
||||
char dt_type,
|
||||
const f77_int n,
|
||||
const f77_int incx,
|
||||
const f77_int incy,
|
||||
const char* filename,
|
||||
const char* function_name,
|
||||
int line
|
||||
);
|
||||
|
||||
#define AOCL_DTL_LOG_GEMM_INPUTS(loglevel, alpha, a, b, beta, c) \
|
||||
AOCL_DTL_log_gemm_sizes(loglevel, alpha, a, b, beta, c, __FILE__, __FUNCTION__, __LINE__);
|
||||
|
||||
@@ -78,6 +102,12 @@ void AOCL_DTL_log_gemv_sizes( int8 loglevel,
|
||||
#define AOCL_DTL_LOG_GEMV_INPUTS(loglevel, dt_type, transa, m, n, alp, lda, incx, beta, incy) \
|
||||
AOCL_DTL_log_gemv_sizes(loglevel, dt_type, transa, m, n, alp, lda, incx, beta, incy, __FILE__,\
|
||||
__FUNCTION__, __LINE__);
|
||||
#define AOCL_DTL_LOG_GER_INPUTS(loglevel, dt_type, m, n, alpha, incx, incy, lda) \
|
||||
AOCL_DTL_log_ger_sizes(loglevel, dt_type, m, n, alpha, incx, incy, lda, __FILE__, __FUNCTION__, __LINE__);
|
||||
|
||||
// Level-1 Macros
|
||||
#define AOCL_DTL_LOG_COPY_INPUTS(loglevel, dt_type, n, incx, incy) \
|
||||
AOCL_DTL_log_copy_sizes(loglevel, dt_type, n, incx, incy, __FILE__, __FUNCTION__, __LINE__);
|
||||
|
||||
#else
|
||||
|
||||
@@ -93,6 +123,10 @@ void AOCL_DTL_log_gemv_sizes( int8 loglevel,
|
||||
|
||||
#define AOCL_DTL_LOG_GEMV_INPUTS(loglevel, dt_type, transa, m, n, alp, lda, incx, beta, incy)
|
||||
|
||||
#define AOCL_DTL_LOG_GER_INPUTS(loglevel, dt_type, m, n, alpha, incx, incy, lda)
|
||||
|
||||
#define AOCL_DTL_LOG_COPY_INPUTS(loglevel, dt_type, n, incx, incy)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -53,6 +53,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -72,6 +74,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( addv, BLIS_ADDV_KER )
|
||||
@@ -90,6 +94,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -108,6 +114,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
index, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( amaxv, BLIS_AMAXV_KER )
|
||||
@@ -127,6 +134,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -148,6 +157,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER )
|
||||
@@ -166,6 +176,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -187,6 +199,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( axpyv, BLIS_AXPYV_KER )
|
||||
@@ -207,6 +220,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -228,6 +243,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
rho, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( dotv, BLIS_DOTV_KER )
|
||||
@@ -249,6 +265,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -272,6 +290,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
rho, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( dotxv, BLIS_DOTXV_KER )
|
||||
@@ -287,6 +306,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -304,6 +325,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
x, incx, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( invertv, BLIS_INVERTV_KER )
|
||||
@@ -321,6 +343,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -340,6 +364,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
x, incx, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( scalv, BLIS_SCALV_KER )
|
||||
@@ -357,6 +382,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -375,6 +402,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( swapv, BLIS_SWAPV_KER )
|
||||
@@ -392,6 +420,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -412,6 +442,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER )
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -57,6 +58,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2); \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -67,7 +70,11 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
bli_set_dims_with_trans( transa, m, n, &m_y, &n_x ); \
|
||||
\
|
||||
/* If y has zero elements, return early. */ \
|
||||
if ( bli_zero_dim1( m_y ) ) return; \
|
||||
if ( bli_zero_dim1( m_y ) ) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* Obtain a valid context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
@@ -85,6 +92,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
cntx, \
|
||||
NULL \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
@@ -118,6 +126,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC3( gemv, gemv, gemv_unf_var1, gemv_unf_var2 )
|
||||
@@ -139,12 +148,18 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
\
|
||||
/* If x or y has zero elements, or if alpha is zero, return early. */ \
|
||||
if ( bli_zero_dim2( m, n ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \
|
||||
if ( bli_zero_dim2( m, n ) || PASTEMAC(ch,eq0)( *alpha ) ) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* Obtain a valid context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
@@ -170,6 +185,9 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
a, rs_a, cs_a, \
|
||||
cntx \
|
||||
); \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC3( ger, ger, ger_unb_var1, ger_unb_var2 )
|
||||
@@ -192,6 +210,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -212,6 +232,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
cntx, \
|
||||
NULL \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
@@ -246,6 +267,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC4( hemv, hemv, BLIS_CONJUGATE, hemv_unf_var1, hemv_unf_var3 )
|
||||
@@ -266,6 +288,8 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
@@ -273,7 +297,11 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
ctype alpha_local; \
|
||||
\
|
||||
/* If x has zero elements, or if alpha is zero, return early. */ \
|
||||
if ( bli_zero_dim1( m ) || PASTEMAC(chr,eq0)( *alpha ) ) return; \
|
||||
if ( bli_zero_dim1( m ) || PASTEMAC(chr,eq0)( *alpha ) ) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* Make a local copy of alpha, cast into the complex domain. This
|
||||
allows us to use the same underlying her variants to implement
|
||||
@@ -311,6 +339,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
a, rs_a, cs_a, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCR_BASIC4( her, her, BLIS_CONJUGATE, her_unb_var1, her_unb_var2 )
|
||||
@@ -330,12 +359,18 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
\
|
||||
/* If x has zero elements, or if alpha is zero, return early. */ \
|
||||
if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \
|
||||
if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* Obtain a valid context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
@@ -368,6 +403,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
a, rs_a, cs_a, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC4( syr, her, BLIS_NO_CONJUGATE, her_unb_var1, her_unb_var2 )
|
||||
@@ -389,12 +425,18 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
\
|
||||
/* If x has zero elements, or if alpha is zero, return early. */ \
|
||||
if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \
|
||||
if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* Obtain a valid context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
@@ -429,6 +471,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
a, rs_a, cs_a, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC4( her2, her2, BLIS_CONJUGATE, her2_unf_var1, her2_unf_var4 )
|
||||
@@ -450,12 +493,18 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
\
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
\
|
||||
/* If x has zero elements, return early. */ \
|
||||
if ( bli_zero_dim1( m ) ) return; \
|
||||
if ( bli_zero_dim1( m ) ) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* Obtain a valid context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
@@ -472,6 +521,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
cntx, \
|
||||
NULL \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
@@ -503,6 +553,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
x, incx, \
|
||||
cntx \
|
||||
); \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2) \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC3( trmv, trmv, trmv_unf_var1, trmv_unf_var2 )
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -50,6 +51,8 @@ void PASTEMAC(ch,varname) \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3) \
|
||||
\
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* a1t; \
|
||||
@@ -83,6 +86,8 @@ void PASTEMAC(ch,varname) \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3) \
|
||||
\
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( ger_unb_var1 )
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -50,6 +51,8 @@ void PASTEMAC(ch,varname) \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3); \
|
||||
\
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* a1; \
|
||||
@@ -83,6 +86,8 @@ void PASTEMAC(ch,varname) \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3) \
|
||||
\
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( ger_unb_var2 )
|
||||
|
||||
@@ -54,6 +54,9 @@ void PASTEF77(ch,blasname) \
|
||||
ftype* y0; \
|
||||
inc_t incx0; \
|
||||
inc_t incy0; \
|
||||
\
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); \
|
||||
AOCL_DTL_LOG_COPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *n, *incx, *incy) \
|
||||
\
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
@@ -77,6 +80,9 @@ void PASTEF77(ch,blasname) \
|
||||
NULL \
|
||||
); \
|
||||
\
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
\
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
}
|
||||
@@ -97,6 +103,8 @@ void scopy_
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1)
|
||||
AOCL_DTL_LOG_COPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, 'S', *n, *incx, *incy)
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
@@ -156,6 +164,7 @@ void scopy_
|
||||
NULL
|
||||
);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
}
|
||||
@@ -173,6 +182,8 @@ void dcopy_
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
|
||||
AOCL_DTL_LOG_COPY_INPUTS(AOCL_DTL_LEVEL_TRACE_1, 'D', *n, *incx, *incy)
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
@@ -232,6 +243,7 @@ void dcopy_
|
||||
NULL
|
||||
);
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1)
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -51,6 +52,9 @@ void PASTEF772(ch,blasname,chc) \
|
||||
ftype* a, const f77_int* lda \
|
||||
) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
AOCL_DTL_LOG_GER_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(ch), *m, *n, (void*)alpha, *incx, *incy, *lda) \
|
||||
\
|
||||
dim_t m0, n0; \
|
||||
ftype* x0; \
|
||||
ftype* y0; \
|
||||
@@ -101,6 +105,8 @@ void PASTEF772(ch,blasname,chc) \
|
||||
NULL, \
|
||||
NULL \
|
||||
); \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \
|
||||
\
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
|
||||
@@ -65,6 +65,8 @@ void bli_saxpyv_zen_int10
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4)
|
||||
|
||||
const dim_t n_elem_per_reg = 8;
|
||||
|
||||
dim_t i;
|
||||
@@ -78,7 +80,11 @@ void bli_saxpyv_zen_int10
|
||||
__m256 zv[10];
|
||||
|
||||
// If the vector dimension is zero, or if alpha is zero, return early.
|
||||
if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return;
|
||||
if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) )
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize local pointers.
|
||||
x0 = x;
|
||||
@@ -257,6 +263,7 @@ void bli_saxpyv_zen_int10
|
||||
y0 += incy;
|
||||
}
|
||||
}
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -271,6 +278,8 @@ void bli_daxpyv_zen_int10
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4)
|
||||
|
||||
const dim_t n_elem_per_reg = 4;
|
||||
|
||||
dim_t i;
|
||||
@@ -284,7 +293,11 @@ void bli_daxpyv_zen_int10
|
||||
__m256d zv[10];
|
||||
|
||||
// If the vector dimension is zero, or if alpha is zero, return early.
|
||||
if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return;
|
||||
if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) )
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize local pointers.
|
||||
x0 = x;
|
||||
@@ -463,6 +476,7 @@ void bli_daxpyv_zen_int10
|
||||
y0 += incy;
|
||||
}
|
||||
}
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -477,6 +491,8 @@ void bli_caxpyv_zen_int5
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4)
|
||||
|
||||
const dim_t n_elem_per_reg = 8;
|
||||
|
||||
dim_t i;
|
||||
@@ -497,7 +513,11 @@ void bli_caxpyv_zen_int5
|
||||
conj_t conjx_use = conjx;
|
||||
|
||||
// If the vector dimension is zero, or if alpha is zero, return early.
|
||||
if ( bli_zero_dim1( n ) || PASTEMAC(c,eq0)( *alpha ) ) return;
|
||||
if ( bli_zero_dim1( n ) || PASTEMAC(c,eq0)( *alpha ) )
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize local pointers.
|
||||
x0 = (float*)x;
|
||||
@@ -756,6 +776,7 @@ void bli_caxpyv_zen_int5
|
||||
}
|
||||
|
||||
}
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -770,6 +791,8 @@ void bli_zaxpyv_zen_int5
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_4)
|
||||
|
||||
const dim_t n_elem_per_reg = 4;
|
||||
|
||||
dim_t i;
|
||||
@@ -789,7 +812,11 @@ void bli_zaxpyv_zen_int5
|
||||
conj_t conjx_use = conjx;
|
||||
|
||||
// If the vector dimension is zero, or if alpha is zero, return early.
|
||||
if ( bli_zero_dim1( n ) || PASTEMAC(z,eq0)( *alpha ) ) return;
|
||||
if ( bli_zero_dim1( n ) || PASTEMAC(z,eq0)( *alpha ) )
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize local pointers.
|
||||
x0 = (double*)x;
|
||||
@@ -1057,4 +1084,5 @@ void bli_zaxpyv_zen_int5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_4)
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2019-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -46,12 +46,18 @@ void bli_scopyv_zen_int
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2)
|
||||
|
||||
const dim_t num_elem_per_reg = 8;
|
||||
__m256 xv[16];
|
||||
dim_t i = 0;
|
||||
|
||||
// If the vector dimension is zero return early.
|
||||
if (bli_zero_dim1(n)) return;
|
||||
if (bli_zero_dim1(n))
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
|
||||
return;
|
||||
}
|
||||
|
||||
if (incx == 1 && incy == 1)
|
||||
{
|
||||
@@ -182,6 +188,7 @@ void bli_scopyv_zen_int
|
||||
y += incy;
|
||||
}
|
||||
}
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -195,12 +202,17 @@ void bli_dcopyv_zen_int
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_2)
|
||||
const dim_t num_elem_per_reg = 4;
|
||||
__m256d xv[16];
|
||||
dim_t i = 0;
|
||||
|
||||
// If the vector dimension is zero return early.
|
||||
if (bli_zero_dim1(n)) return;
|
||||
if (bli_zero_dim1(n))
|
||||
{
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
|
||||
return;
|
||||
}
|
||||
|
||||
if (incx == 1 && incy == 1)
|
||||
{
|
||||
@@ -326,5 +338,6 @@ void bli_dcopyv_zen_int
|
||||
y += incy;
|
||||
}
|
||||
}
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -46,7 +47,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
cntx_t* restrict cntx \
|
||||
) \
|
||||
{ \
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_3) \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) \
|
||||
{ \
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3) \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) \
|
||||
{ \
|
||||
@@ -90,6 +97,9 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_3) \
|
||||
\
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC2( copyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX )
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2019-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -165,6 +165,32 @@ int main(int argc, char** argv)
|
||||
yp, &incy
|
||||
);
|
||||
}
|
||||
else if (bli_is_scomplex(dt))
|
||||
{
|
||||
f77_int nn = bli_obj_length(&x);
|
||||
f77_int incx = bli_obj_vector_inc(&x);
|
||||
scomplex* xp = bli_obj_buffer(&x);
|
||||
f77_int incy = bli_obj_vector_inc(&y);
|
||||
scomplex* yp = bli_obj_buffer(&y);
|
||||
|
||||
ccopy_( &nn,
|
||||
xp, &incx,
|
||||
yp, &incy
|
||||
);
|
||||
}
|
||||
else if(bli_is_dcomplex(dt))
|
||||
{
|
||||
f77_int nn = bli_obj_length(&x);
|
||||
f77_int incx = bli_obj_vector_inc(&x);
|
||||
dcomplex* xp = bli_obj_buffer(&x);
|
||||
f77_int incy = bli_obj_vector_inc(&y);
|
||||
dcomplex* yp = bli_obj_buffer(&y);
|
||||
|
||||
zcopy_( &nn,
|
||||
xp, &incx,
|
||||
yp, &incy
|
||||
);
|
||||
}
|
||||
#endif
|
||||
dtime_save = bli_clock_min_diff(dtime_save, dtime);
|
||||
#ifdef BLIS_ACCURACY_TEST
|
||||
@@ -199,6 +225,7 @@ int main(int argc, char** argv)
|
||||
if (p >= 10000)
|
||||
p_inc = 10000;
|
||||
Gbps = (n * sizeof_dt) / (dtime_save * 1.0e9);
|
||||
if(bli_is_complex(dt)) Gbps *= 2;
|
||||
#ifdef BLIS
|
||||
printf("data_copyv_blis\t");
|
||||
#else
|
||||
|
||||
262
test/test_ger.c
262
test/test_ger.c
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -46,155 +47,218 @@
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
obj_t a, x, y;
|
||||
obj_t a_save;
|
||||
obj_t alpha;
|
||||
dim_t m, n;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
int m_input, n_input;
|
||||
num_t dt_a, dt_x, dt_y;
|
||||
num_t dt_alpha;
|
||||
int r, n_repeats;
|
||||
obj_t a, x, y;
|
||||
obj_t a_save;
|
||||
obj_t alpha;
|
||||
dim_t m, n;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
int m_input, n_input;
|
||||
num_t dt_a, dt_x, dt_y, dt;
|
||||
num_t dt_alpha;
|
||||
int r, n_repeats;
|
||||
|
||||
double dtime;
|
||||
double dtime_save;
|
||||
double gflops;
|
||||
double dtime;
|
||||
double dtime_save;
|
||||
double gflops;
|
||||
|
||||
//bli_init();
|
||||
//bli_init();
|
||||
|
||||
n_repeats = 3;
|
||||
n_repeats = 3;
|
||||
|
||||
#ifndef PRINT
|
||||
p_begin = 40;
|
||||
p_end = 2000;
|
||||
p_inc = 40;
|
||||
p_begin = 40;
|
||||
p_end = 4000;
|
||||
p_inc = 40;
|
||||
|
||||
m_input = -1;
|
||||
n_input = -1;
|
||||
m_input = -1;
|
||||
n_input = -1;
|
||||
#else
|
||||
p_begin = 16;
|
||||
p_end = 16;
|
||||
p_inc = 1;
|
||||
p_begin = 16;
|
||||
p_end = 16;
|
||||
p_inc = 1;
|
||||
|
||||
m_input = 15;
|
||||
n_input = 15;
|
||||
m_input = 15;
|
||||
n_input = 15;
|
||||
#endif
|
||||
|
||||
dt_alpha = dt_x = dt_y = dt_a = BLIS_DOUBLE;
|
||||
dt = dt_alpha = dt_x = dt_y = dt_a = BLIS_FLOAT;
|
||||
|
||||
// Begin with initializing the last entry to zero so that
|
||||
// matlab allocates space for the entire array once up-front.
|
||||
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
|
||||
// Begin with initializing the last entry to zero so that
|
||||
// matlab allocates space for the entire array once up-front.
|
||||
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
|
||||
#ifdef BLIS
|
||||
printf( "data_ger_blis" );
|
||||
printf( "data_ger_blis" );
|
||||
#else
|
||||
printf( "data_ger_%s", BLAS );
|
||||
printf( "data_ger_%s", BLAS );
|
||||
#endif
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin)/p_inc + 1,
|
||||
( unsigned long )0,
|
||||
( unsigned long )0, 0.0 );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin)/p_inc + 1,
|
||||
( unsigned long )0,
|
||||
( unsigned long )0, 0.0 );
|
||||
|
||||
//for ( p = p_begin; p <= p_end; p += p_inc )
|
||||
for ( p = p_end; p_begin <= p; p -= p_inc )
|
||||
{
|
||||
//for ( p = p_begin; p <= p_end; p += p_inc )
|
||||
for ( p = p_end; p_begin <= p; p -= p_inc )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
|
||||
else m = ( dim_t ) m_input;
|
||||
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
|
||||
else n = ( dim_t ) n_input;
|
||||
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
|
||||
else m = ( dim_t ) m_input;
|
||||
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
|
||||
else n = ( dim_t ) n_input;
|
||||
|
||||
|
||||
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
|
||||
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
|
||||
|
||||
bli_obj_create( dt_x, m, 1, 0, 0, &x );
|
||||
bli_obj_create( dt_y, n, 1, 0, 0, &y );
|
||||
bli_obj_create( dt_a, m, n, 0, 0, &a );
|
||||
bli_obj_create( dt_a, m, n, 0, 0, &a_save );
|
||||
bli_obj_create( dt_x, m, 1, 0, 0, &x );
|
||||
bli_obj_create( dt_y, n, 1, 0, 0, &y );
|
||||
bli_obj_create( dt_a, m, n, 0, 0, &a );
|
||||
bli_obj_create( dt_a, m, n, 0, 0, &a_save );
|
||||
|
||||
bli_randm( &x );
|
||||
bli_randm( &y );
|
||||
bli_randm( &a );
|
||||
bli_randm( &x );
|
||||
bli_randm( &y );
|
||||
bli_randm( &a );
|
||||
|
||||
|
||||
bli_setsc( (2.0/1.0), 0.0, &alpha );
|
||||
bli_setsc( (0.9/1.0), -1.1, &alpha );
|
||||
|
||||
|
||||
bli_copym( &a, &a_save );
|
||||
|
||||
dtime_save = DBL_MAX;
|
||||
bli_copym( &a, &a_save );
|
||||
|
||||
dtime_save = DBL_MAX;
|
||||
|
||||
for ( r = 0; r < n_repeats; ++r )
|
||||
{
|
||||
bli_copym( &a_save, &a );
|
||||
for ( r = 0; r < n_repeats; ++r )
|
||||
{
|
||||
bli_copym( &a_save, &a );
|
||||
|
||||
|
||||
dtime = bli_clock();
|
||||
dtime = bli_clock();
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "x", &x, "%4.1f", "" );
|
||||
bli_printm( "y", &y, "%4.1f", "" );
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "x", &x, "%4.1f", "" );
|
||||
bli_printm( "y", &y, "%4.1f", "" );
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
#endif
|
||||
|
||||
#ifdef BLIS
|
||||
|
||||
bli_ger( &alpha,
|
||||
&x,
|
||||
&y,
|
||||
&a );
|
||||
bli_ger( &alpha,
|
||||
&x,
|
||||
&y,
|
||||
&a );
|
||||
#else
|
||||
|
||||
f77_int mm = bli_obj_length( &a );
|
||||
f77_int nn = bli_obj_width( &a );
|
||||
f77_int incx = bli_obj_vector_inc( &x );
|
||||
f77_int incy = bli_obj_vector_inc( &y );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* xp = bli_obj_buffer( &x );
|
||||
double* yp = bli_obj_buffer( &y );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
if(bli_is_float(dt))
|
||||
{
|
||||
f77_int mm = bli_obj_length( &a );
|
||||
f77_int nn = bli_obj_width( &a );
|
||||
f77_int incx = bli_obj_vector_inc( &x );
|
||||
f77_int incy = bli_obj_vector_inc( &y );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
float* alphap = bli_obj_buffer( &alpha );
|
||||
float* xp = bli_obj_buffer( &x );
|
||||
float* yp = bli_obj_buffer( &y );
|
||||
float* ap = bli_obj_buffer( &a );
|
||||
|
||||
sger_( &mm,
|
||||
&nn,
|
||||
alphap,
|
||||
xp, &incx,
|
||||
yp, &incy,
|
||||
ap, &lda );
|
||||
}
|
||||
else if(bli_is_double(dt))
|
||||
{
|
||||
f77_int mm = bli_obj_length( &a );
|
||||
f77_int nn = bli_obj_width( &a );
|
||||
f77_int incx = bli_obj_vector_inc( &x );
|
||||
f77_int incy = bli_obj_vector_inc( &y );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
double* alphap = bli_obj_buffer( &alpha );
|
||||
double* xp = bli_obj_buffer( &x );
|
||||
double* yp = bli_obj_buffer( &y );
|
||||
double* ap = bli_obj_buffer( &a );
|
||||
|
||||
dger_( &mm,
|
||||
&nn,
|
||||
alphap,
|
||||
xp, &incx,
|
||||
yp, &incy,
|
||||
ap, &lda );
|
||||
}
|
||||
else if(bli_is_scomplex(dt))
|
||||
{
|
||||
f77_int mm = bli_obj_length( &a );
|
||||
f77_int nn = bli_obj_width( &a );
|
||||
f77_int incx = bli_obj_vector_inc( &x );
|
||||
f77_int incy = bli_obj_vector_inc( &y );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
scomplex* alphap = bli_obj_buffer( &alpha );
|
||||
scomplex* xp = bli_obj_buffer( &x );
|
||||
scomplex* yp = bli_obj_buffer( &y );
|
||||
scomplex* ap = bli_obj_buffer( &a );
|
||||
|
||||
cgeru_( &mm,
|
||||
&nn,
|
||||
alphap,
|
||||
xp, &incx,
|
||||
yp, &incy,
|
||||
ap, &lda );
|
||||
}
|
||||
else if(bli_is_dcomplex(dt))
|
||||
{
|
||||
f77_int mm = bli_obj_length( &a );
|
||||
f77_int nn = bli_obj_width( &a );
|
||||
f77_int incx = bli_obj_vector_inc( &x );
|
||||
f77_int incy = bli_obj_vector_inc( &y );
|
||||
f77_int lda = bli_obj_col_stride( &a );
|
||||
dcomplex* alphap = bli_obj_buffer( &alpha );
|
||||
dcomplex* xp = bli_obj_buffer( &x );
|
||||
dcomplex* yp = bli_obj_buffer( &y );
|
||||
dcomplex* ap = bli_obj_buffer( &a );
|
||||
|
||||
zgeru_( &mm,
|
||||
&nn,
|
||||
alphap,
|
||||
xp, &incx,
|
||||
yp, &incy,
|
||||
ap, &lda );
|
||||
}
|
||||
|
||||
dger_( &mm,
|
||||
&nn,
|
||||
alphap,
|
||||
xp, &incx,
|
||||
yp, &incy,
|
||||
ap, &lda );
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "a after", &a, "%4.1f", "" );
|
||||
exit(1);
|
||||
bli_printm( "a after", &a, "%4.1f", "" );
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 );
|
||||
gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 );
|
||||
|
||||
if(bli_is_complex(dt)) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_ger_blis" );
|
||||
printf( "data_ger_blis" );
|
||||
#else
|
||||
printf( "data_ger_%s", BLAS );
|
||||
printf( "data_ger_%s", BLAS );
|
||||
#endif
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
( unsigned long )n, gflops );
|
||||
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
|
||||
( unsigned long )(p - p_begin)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
( unsigned long )n, gflops );
|
||||
|
||||
bli_obj_free( &alpha );
|
||||
bli_obj_free( &alpha );
|
||||
|
||||
bli_obj_free( &x );
|
||||
bli_obj_free( &y );
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &a_save );
|
||||
}
|
||||
bli_obj_free( &x );
|
||||
bli_obj_free( &y );
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &a_save );
|
||||
}
|
||||
|
||||
//bli_finalize();
|
||||
//bli_finalize();
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user