diff --git a/CMakeLists.txt b/CMakeLists.txt index 8572aad64..3fa559abc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,10 +90,16 @@ option(BLIS_ENABLE_ILP64 "ENABLE BLIS ILP64" OFF) option(ENABLE_INT_TYPE_SIZE " Internal BLIS integers ,used in native BLIS interfaces based on architecture dependent " ON) option(ENABLE_BLASTEST "Enable the blastest" OFF) option(ENABLE_TESTCPP_TESTING "Enabling testcpp" OFF) +option (ENABLE_NO_UNDERSCORE_API "export APIs without underscore" ON) +option (ENABLE_UPPERCASE_API "export APIs with uppercase" OFF) +option (ENABLE_API_WRAPPER "Enable wrapper code" OFF) option (ENABLE_COMPLEX_RETURN_INTEL "Enable complex_return_intel" OFF) option (ENABLE_TRSM_PREINVERSION "Enable TRSM preinversion" ON) option (ENABLE_AOCL_DYNAMIC "Enable Dynamic Multi-threading" OFF) +if(ENABLE_NO_UNDERSCORE_API) + add_definitions(-DBLIS_ENABLE_NO_UNDERSCORE_API) +endif() if(ENABLE_COMPLEX_RETURN_INTEL) set(BLIS_ENABLE_COMPLEX_RETURN_INTEL TRUE) @@ -101,6 +107,14 @@ else() set(BLIS_DISABLE_COMPLEX_RETURN_INTEL TRUE) endif() +if(ENABLE_UPPERCASE_API) + add_definitions(-DBLIS_ENABLE_UPPERCASE_API) +endif() + +if(ENABLE_API_WRAPPER) + add_definitions(-DBLIS_ENABLE_API_WRAPPER) +endif() + if(ENABLE_AOCL_DYNAMIC) set(AOCL_DYNAMIC TRUE) endif() diff --git a/frame/compat/cblas/src/cblas_f77.h b/frame/compat/cblas/src/cblas_f77.h index b09963eec..fabf3efb1 100644 --- a/frame/compat/cblas/src/cblas_f77.h +++ b/frame/compat/cblas/src/cblas_f77.h @@ -14,6 +14,195 @@ #ifndef CBLAS_F77_H #define CBLAS_F77_H +#if defined(BLIS_ENABLE_NO_UNDERSCORE_API) + /* + * Level 1 BLAS + */ +#define F77_xerbla xerbla +#define F77_srotg srotg +#define F77_srotmg srotmg +#define F77_srot srot +#define F77_srotm srotm +#define F77_drotg drotg +#define F77_drotmg drotmg +#define F77_drot drot +#define F77_drotm drotm +#define F77_sswap sswap +#define F77_scopy scopy +#define F77_saxpy saxpy +#define F77_isamax_sub isamaxsub +#define F77_dswap dswap +#define F77_dcopy dcopy +#define F77_daxpy daxpy +#define F77_idamax_sub idamaxsub +#define F77_cswap cswap +#define F77_ccopy ccopy +#define F77_caxpy caxpy +#define F77_icamax_sub icamaxsub +#define F77_zswap zswap +#define F77_zcopy zcopy +#define F77_zaxpy zaxpy +#define F77_zaxpby zaxpby +#define F77_izamax_sub izamaxsub +#define F77_sdot_sub sdotsub +#define F77_ddot_sub ddotsub +#define F77_dsdot_sub dsdotsub +#define F77_sscal sscal +#define F77_dscal dscal +#define F77_cscal cscal +#define F77_zscal zscal +#define F77_csscal csscal +#define F77_zdscal zdscal +#define F77_cdotu_sub cdotusub +#define F77_cdotc_sub cdotcsub +#define F77_zdotu_sub zdotusub +#define F77_zdotc_sub zdotcsub +#define F77_snrm2_sub snrm2sub +#define F77_sasum_sub sasumsub +#define F77_dnrm2_sub dnrm2sub +#define F77_dasum_sub dasumsub +#define F77_scnrm2_sub scnrm2sub +#define F77_scasum_sub scasumsub +#define F77_dznrm2_sub dznrm2sub +#define F77_dzasum_sub dzasumsub +#define F77_sdsdot_sub sdsdotsub +/* +* Level 2 BLAS +*/ +#define F77_ssymv ssymv +#define F77_ssbmv ssbmv +#define F77_sspmv sspmv +#define F77_sger sger +#define F77_ssyr ssyr +#define F77_sspr sspr +#define F77_ssyr2 ssyr2 +#define F77_sspr2 sspr2 +#define F77_dsymv dsymv +#define F77_dsbmv dsbmv +#define F77_dspmv dspmv +#define F77_dger dger +#define F77_dsyr dsyr +#define F77_dspr dspr +#define F77_dsyr2 dsyr2 +#define F77_dspr2 dspr2 +#define F77_chemv chemv +#define F77_chbmv chbmv +#define F77_chpmv chpmv +#define F77_cgeru cgeru +#define F77_cgerc cgerc +#define F77_cher cher +#define F77_chpr chpr +#define F77_cher2 cher2 +#define F77_chpr2 chpr2 +#define F77_zhemv zhemv +#define F77_zhbmv zhbmv +#define F77_zhpmv zhpmv +#define F77_zgeru zgeru +#define F77_zgerc zgerc +#define F77_zher zher +#define F77_zhpr zhpr +#define F77_zher2 zher2 +#define F77_zhpr2 zhpr2 +#define F77_sgemv sgemv +#define F77_sgbmv sgbmv +#define F77_strmv strmv +#define F77_stbmv stbmv +#define F77_stpmv stpmv +#define F77_strsv strsv +#define F77_stbsv stbsv +#define F77_stpsv stpsv +#define F77_dgemv dgemv +#define F77_dgbmv dgbmv +#define F77_dtrmv dtrmv +#define F77_dtbmv dtbmv +#define F77_dtpmv dtpmv +#define F77_dtrsv dtrsv +#define F77_dtbsv dtbsv +#define F77_dtpsv dtpsv +#define F77_cgemv cgemv +#define F77_cgbmv cgbmv +#define F77_ctrmv ctrmv +#define F77_ctbmv ctbmv +#define F77_ctpmv ctpmv +#define F77_ctrsv ctrsv +#define F77_ctbsv ctbsv +#define F77_ctpsv ctpsv +#define F77_zgemv zgemv +#define F77_zgbmv zgbmv +#define F77_ztrmv ztrmv +#define F77_ztbmv ztbmv +#define F77_ztpmv ztpmv +#define F77_ztrsv ztrsv +#define F77_ztbsv ztbsv +#define F77_ztpsv ztpsv +/* +* Level 3 BLAS +*/ +#define F77_chemm chemm +#define F77_cherk cherk +#define F77_cher2k cher2k +#define F77_zhemm zhemm +#define F77_zherk zherk +#define F77_zher2k zher2k +#define F77_sgemm sgemm +#define F77_ssymm ssymm +#define F77_ssyrk ssyrk +#define F77_ssyr2k ssyr2k +#define F77_strmm strmm +#define F77_strsm strsm +#define F77_dgemm dgemm +#define F77_dsymm dsymm +#define F77_dsyrk dsyrk +#define F77_dsyr2k dsyr2k +#define F77_dtrmm dtrmm +#define F77_dtrsm dtrsm +#define F77_cgemm cgemm +#define F77_csymm csymm +#define F77_csyrk csyrk +#define F77_csyr2k csyr2k +#define F77_ctrmm ctrmm +#define F77_ctrsm ctrsm +#define F77_zgemm zgemm +#define F77_zsymm zsymm +#define F77_zsyrk zsyrk +#define F77_zsyr2k zsyr2k +#define F77_ztrmm ztrmm +#define F77_ztrsm ztrsm +#define F77_dgemmt dgemmt +#define F77_sgemmt sgemmt +#define F77_cgemmt cgemmt +#define F77_zgemmt zgemmt + +/* +* Aux Function +*/ +#define F77_scabs1 scabs1 +#define F77_dcabs1 dcabs1 + +/* + * -- BLAS Extension APIs -- + */ + +#define F77_saxpby saxpby +#define F77_daxpby daxpby +#define F77_caxpby caxpby +#define F77_zaxpby zaxpby +#define F77_cgemm3m cgemm3m +#define F77_zgemm3m zgemm3m + +#define F77_isamin_sub isaminsub +#define F77_idamin_sub idaminsub +#define F77_icamin_sub icaminsub +#define F77_izamin_sub izaminsub + +// -- Batch APIs -- +#define F77_sgemm_batch sgemm_batch +#define F77_dgemm_batch dgemm_batch +#define F77_cgemm_batch cgemm_batch +#define F77_zgemm_batch zgemm_batch + +// (BLIS_ENABLE_NO_UNDERSCORE_API) ends +#else /* * Level 1 BLAS */ @@ -201,4 +390,4 @@ #define F77_zgemm_batch zgemm_batch_ #endif -/* CBLAS_F77_H */ +#endif /* CBLAS_F77_H */ \ No newline at end of file diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index 980859039..f29fdc1fe 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -6,7 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018-2021, Advanced Micro Devices, Inc. All rights reserved. - + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -156,12 +156,18 @@ #define STRINGIFY_INT( s ) MKSTR( s ) #define PASTEMACT(ch1, ch2, ch3, ch4) bli_ ## ch1 ## ch2 ## _ ## ch3 ## _ ## ch4 - +// name-mangling macros. +#ifdef BLIS_ENABLE_NO_UNDERSCORE_API +#define PASTEF770(name) name +#define PASTEF77(ch1,name) ch1 ## name +#define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name +#define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name +#else #define PASTEF770(name) name ## _ #define PASTEF77(ch1,name) ch1 ## name ## _ #define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name ## _ #define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name ## _ - +#endif // -- Include other groups of macros @@ -181,4 +187,346 @@ #include "bli_oapi_macro_defs.h" #include "bli_tapi_macro_defs.h" + +#ifdef BLIS_ENABLE_NO_UNDERSCORE_API +#define isamax_ isamax +#define idamax_ idamax +#define icamax_ icamax +#define izamax_ izamax +#define sasum_ sasum +#define dasum_ dasum +#define scasum_ scasum +#define dzasum_ dzasum +#define saxpy_ saxpy +#define daxpy_ daxpy +#define caxpy_ caxpy +#define zaxpy_ zaxpy +#define scopy_ scopy +#define dcopy_ dcopy +#define ccopy_ ccopy +#define zcopy_ zcopy +#define sdot_ sdot +#define ddot_ ddot +#define cdotc_ cdotc +#define zdotc_ zdotc +#define cdotu_ cdotu +#define zdotu_ zdotu +#define snrm2_ snrm2 +#define dnrm2_ dnrm2 +#define scnrm2_ scnrm2 +#define dznrm2_ dznrm2 +#define sscal_ sscal +#define dscal_ dscal +#define cscal_ cscal +#define csscal_ csscal +#define zscal_ zscal +#define zdscal_ zdscal +#define sswap_ sswap +#define dswap_ dswap +#define cswap_ cswap +#define zswap_ zswap +#define sgemv_ sgemv +#define dgemv_ dgemv +#define cgemv_ cgemv +#define zgemv_ zgemv +#define sger_ sger +#define dger_ dger +#define cgerc_ cgerc +#define cgeru_ cgeru +#define zgerc_ zgerc +#define zgeru_ zgeru +#define chemv_ chemv +#define zhemv_ zhemv +#define cher_ cher +#define zher_ zher +#define cher2_ cher2 +#define zher2_ zher2 +#define ssymv_ ssymv +#define dsymv_ dsymv +#define csymm_ csymm +#define zsymm_ zsymm +#define ssyr_ ssyr +#define dsyr_ dsyr +#define csyrk_ csyrk +#define csyrk_ csyrk +#define zsyrk_ zsyrk +#define ssyr2_ ssyr2 +#define dsyr2_ dsyr2 +#define csyr2k_ csyr2k +#define zsyr2k_ zsyr2k +#define strmv_ strmv +#define dtrmv_ dtrmv +#define ctrmv_ ctrmv +#define ztrmv_ ztrmv +#define strsv_ strsv +#define dtrsv_ dtrsv +#define ctrsv_ ctrsv +#define ztrsv_ ztrsv +#define sgemm_ sgemm +#define dgemm_ dgemm +#define cgemm_ cgemm +#define zgemm_ zgemm +#define chemm_ chemm +#define zhemm_ zhemm +#define dgemmt_ dgemmt +#define sgemmt_ sgemmt +#define zgemmt_ zgemmt +#define cgemmt_ cgemmt +#define sgemm_batch_ sgemm_batch +#define dgemm_batch_ dgemm_batch +#define cgemm_batch_ cgemm_batch +#define zgemm_batch_ zgemm_batch +#define saxpby_ saxpby +#define daxpby_ daxpby +#define caxpby_ caxpby +#define zaxpby_ zaxpby +#define cher2k_ cher2k +#define zher2k_ zher2k +#define cherk_ cherk +#define zherk_ zherk +#define ssymm_ ssymm +#define dsymm_ dsymm +#define ssyr2k_ ssyr2k +#define dsyr2k_ dsyr2k +#define ssyrk_ ssyrk +#define dsyrk_ dsyrk +#define strmm_ strmm +#define dtrmm_ dtrmm +#define ctrmm_ ctrmm +#define ztrmm_ ztrmm +#define strsm_ strsm +#define dtrsm_ dtrsm +#define ctrsm_ ctrsm +#define ztrsm_ ztrsm +#define lsame_ lsame +#define cimatcopy_ cimatcopy +#define comatadd_ comatadd +#define comatcopy2_ comatcopy2 +#define comatcopy_ comatcopy +#define dimatcopy_ dimatcopy +#define domatadd_ domatadd +#define domatcopy2_ domatcopy2 +#define domatcopy_ domatcopy +#define simatcopy_ simatcopy +#define somatadd_ somatadd +#define somatcopy2_ somatcopy2 +#define somatcopy_ somatcopy +#define zimatcopy_ zimatcopy +#define zomatadd_ zomatadd +#define zomatcopy2_ zomatcopy2 +#define zomatcopy_ zomatcopy #endif + +#ifdef BLIS_ENABLE_UPPERCASE_API +#define caxpby CAXPBY +#define caxpy CAXPY +#define ccopy CCOPY +#define cdotc CDOTC +#define cdotcsub CDOTCSUB +#define cdotu CDOTU +#define cdotusub CDOTUSUB +#define cgbmv CGBMV +#define cgemm CGEMM +#define cgemm3m CGEMM3M +#define cgemm_batch CGEMM_BATCH +#define cgemmt CGEMMT +#define cgemv CGEMV +#define cgerc CGERC +#define cgeru CGERU +#define chbmv CHBMV +#define chemm CHEMM +#define chemv CHEMV +#define cher CHER +#define cher2 CHER2 +#define cher2k CHER2K +#define cherk CHERK +#define chpmv CHPMV +#define chpr CHPR +#define chpr2 CHPR2 +#define cimatcopy CIMATCOPY +#define comatadd COMATADD +#define comatcopy2 COMATCOPY2 +#define comatcopy COMATCOPY +#define crotg CROTG +#define cscal CSCAL +#define csrot CSROT +#define csscal CSSCAL +#define cswap CSWAP +#define csymm CSYMM +#define csyr2k CSYR2K +#define csyrk CSYRK +#define ctbmv CTBMV +#define ctbsv CTBSV +#define ctpmv CTPMV +#define ctpsv CTPSV +#define ctrmm CTRMM +#define ctrmv CTRMV +#define ctrsm CTRSM +#define ctrsv CTRSV +#define dasum DASUM +#define dasumsub DASUMSUB +#define daxpby DAXPBY +#define daxpy DAXPY +#define dcabs1 DCABS1 +#define dcopy DCOPY +#define ddot DDOT +#define ddotsub DDOTSUB +#define dgbmv DGBMV +#define dgemm DGEMM +#define dgemm_batch DGEMM_BATCH +#define dgemmt DGEMMT +#define dgemv DGEMV +#define dger DGER +#define dnrm2 DNRM2 +#define dnrm2sub DNRM2SUB +#define dimatcopy DIMATCOPY +#define domatadd DOMATADD +#define domatcopy2 DOMATCOPY2 +#define domatcopy DOMATCOPY +#define drot DROT +#define drotg DROTG +#define drotm DROTM +#define drotmg DROTMG +#define dsbmv DSBMV +#define dscal DSCAL +#define dsdot DSDOT +#define dsdotsub DSDOTSUB +#define dspmv DSPMV +#define dspr DSPR +#define dspr2 DSPR2 +#define dswap DSWAP +#define dsymm DSYMM +#define dsymv DSYMV +#define dsyr DSYR +#define dsyr2 DSYR2 +#define dsyr2k DSYR2K +#define dsyrk DSYRK +#define dtbmv DTBMV +#define dtbsv DTBSV +#define dtpmv DTPMV +#define dtpsv DTPSV +#define dtrmm DTRMM +#define dtrmv DTRMV +#define dtrsm DTRSM +#define dtrsv DTRSV +#define dzasum DZASUM +#define dzasumsub DZASUMSUB +#define dznrm2 DZNRM2 +#define dznrm2sub DZNRM2SUB +#define icamax ICAMAX +#define icamaxsub ICAMAXSUB +#define icamin ICAMIN +#define icaminsub ICAMINSUB +#define idamax IDAMAX +#define idamaxsub IDAMAXSUB +#define idamin IDAMIN +#define idaminsub IDAMINSUB +#define isamax ISAMAX +#define isamaxsub ISAMAXSUB +#define isamin ISAMIN +#define isaminsub ISAMINSUB +#define izamax IZAMAX +#define izamaxsub IZAMAXSUB +#define izamin IZAMIN +#define izaminsub IZAMINSUB +#define lsame LSAME +#define sasum SASUM +#define sasumsub SASUMSUB +#define saxpby SAXPBY +#define saxpy SAXPY +#define scabs1 SCABS1 +#define scasum SCASUM +#define scasumsub SCASUMSUB +#define scnrm2 SCNRM2 +#define scnrm2sub SCNRM2SUB +#define scopy SCOPY +#define sdot SDOT +#define sdotsub SDOTSUB +#define sdsdot SDSDOT +#define sdsdotsub SDSDOTSUB +#define sgbmv SGBMV +#define sgemm SGEMM +#define sgemm_batch SGEMM_BATCH +#define sgemmt SGEMMT +#define sgemv SGEMV +#define sger SGER +#define snrm2 SNRM2 +#define snrm2sub SNRM2SUB +#define simatcopy SIMATCOPY +#define somatadd SOMATADD +#define somatcopy2 SOMATCOPY2 +#define somatcopy SOMATCOPY +#define srot SROT +#define srotg SROTG +#define srotm SROTM +#define srotmg SROTMG +#define ssbmv SSBMV +#define sscal SSCAL +#define sspmv SSPMV +#define sspr SSPR +#define sspr2 SSPR2 +#define sswap SSWAP +#define ssymm SSYMM +#define ssymv SSYMV +#define ssyr SSYR +#define ssyr2 SSYR2 +#define ssyr2k SSYR2K +#define ssyrk SSYRK +#define stbmv STBMV +#define stbsv STBSV +#define stpmv STPMV +#define stpsv STPSV +#define strmm STRMM +#define strmv STRMV +#define strsm STRSM +#define strsv STRSV +#define xerbla XERBLA +#define zaxpby ZAXPBY +#define zaxpy ZAXPY +#define zcopy ZCOPY +#define zdotc ZDOTC +#define zdotcsub ZDOTCSUB +#define zdotu ZDOTU +#define zdotusub ZDOTUSUB +#define zdrot ZDROT +#define zdscal ZDSCAL +#define zgbmv ZGBMV +#define zgemm ZGEMM +#define zgemm3m ZGEMM3M +#define zgemm_batch ZGEMM_BATCH +#define zgemmt ZGEMMT +#define zgemv ZGEMV +#define zgerc ZGERC +#define zgeru ZGERU +#define zhbmv ZHBMV +#define zhemm ZHEMM +#define zhemv ZHEMV +#define zher ZHER +#define zher2 ZHER2 +#define zher2k ZHER2K +#define zherk ZHERK +#define zhpmv ZHPMV +#define zhpr ZHPR +#define zhpr2 ZHPR2 +#define zimatcopy ZIMATCOPY +#define zomatadd ZOMATADD +#define zomatcopy2 ZOMATCOPY2 +#define zomatcopy ZOMATCOPY +#define zrotg ZROTG +#define zscal ZSCAL +#define zswap ZSWAP +#define zsymm ZSYMM +#define zsyr2k ZSYR2K +#define zsyrk ZSYRK +#define ztbmv ZTBMV +#define ztbsv ZTBSV +#define ztpmv ZTPMV +#define ztpsv ZTPSV +#define ztrmm ZTRMM +#define ztrmv ZTRMV +#define ztrsm ZTRSM +#define ztrsv ZTRSV +#endif + +#endif + diff --git a/frame/util/bli_util_api_wrap.c b/frame/util/bli_util_api_wrap.c index 393a56e14..128fba8b8 100644 --- a/frame/util/bli_util_api_wrap.c +++ b/frame/util/bli_util_api_wrap.c @@ -32,11 +32,14 @@ */ +// file define different formats of BLAS APIs- uppercase with +// and without underscore, lowercase without underscore. + #include "blis.h" #include "bli_util_api_wrap.h" // wrapper functions to support additional symbols - +#ifdef BLIS_ENABLE_API_WRAPPER void CAXPY(const f77_int *n,const scomplex *ca,const scomplex *cx,const f77_int *incx,scomplex *cy,const f77_int *incy) { caxpy_( n, ca, cx, incx, cy, incy); @@ -3215,4 +3218,6 @@ void caxpby( const f77_int* n, const scomplex* alpha, const scomplex *x, cons void CAXPBY_( const f77_int* n, const scomplex* alpha, const scomplex *x, const f77_int* incx, const scomplex* beta, scomplex *y, const f77_int* incy) { caxpby_(n, alpha, x, incx, beta, y, incy); -} \ No newline at end of file +} + +#endif diff --git a/frame/util/bli_util_api_wrap.h b/frame/util/bli_util_api_wrap.h index 46d5a636a..f0aff49ff 100644 --- a/frame/util/bli_util_api_wrap.h +++ b/frame/util/bli_util_api_wrap.h @@ -32,6 +32,10 @@ */ +// file define different formats of BLAS APIs- uppercase with +// and without underscore, lowercase without underscore. + +#ifdef BLIS_ENABLE_API_WRAPPER //Level 1 APIs BLIS_EXPORT_BLIS void SROTG(float *sa, float *sb, float *c, float *s); @@ -1724,4 +1728,4 @@ BLIS_EXPORT_BLIS void zomatcopy(f77_char* trans, f77_int* rows, f77_int* cols, BLIS_EXPORT_BLIS void ZOMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb); - +#endif