diff --git a/CMakeLists.txt b/CMakeLists.txt index de5b828a2..452452d1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.## +##Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.## cmake_minimum_required(VERSION 3.0.0) @@ -88,7 +88,7 @@ option(BLIS_ENABLE_ILP64 "ENABLE BLIS ILP64" OFF) option(ENABLE_INT_TYPE_SIZE "Integer size type" OFF) option(ENABLE_BLASTEST "Enable the blastest" OFF) option(ENABLE_TESTCPP_TESTING "Enabling testcpp" OFF) -option (ENABLE_NO_UNDERSCORE_API "export APIs without underscore" OFF) +option (ENABLE_NO_UNDERSCORE_API "export APIs without underscore" ON) option (ENABLE_UPPERCASE "export APIs with uppercase" OFF) if(ENABLE_NO_UNDERSCORE_API) diff --git a/build/bli_win_config.h.in b/build/bli_win_config.h.in index 1aaa182c6..0f6c63a74 100644 --- a/build/bli_win_config.h.in +++ b/build/bli_win_config.h.in @@ -9,8 +9,6 @@ #cmakedefine BLIS_ENABLE_OPENMP -#cmakedefine BLIS_ENABLE_PTHREADS - #cmakedefine BLIS_ENABLE_JRIR_SLAB #cmakedefine BLIS_ENABLE_JRIR_RR diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index 77bc14a37..b08a76520 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -157,10 +157,17 @@ #define PASTEMACT(ch1, ch2, ch3, ch4) bli_ ## ch1 ## ch2 ## _ ## ch3 ## _ ## ch4 // Fortran-77 name-mangling macros. +#ifdef BLIS_ENABLE_NO_UNDERSCORE_API +#define PASTEF770(name) name +#define PASTEF77(ch1,name) ch1 ## name +#define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name +#define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name +#else #define PASTEF770(name) name ## _ #define PASTEF77(ch1,name) ch1 ## name ## _ #define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name ## _ #define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name ## _ +#endif // -- Include other groups of macros @@ -181,5 +188,328 @@ #include "bli_oapi_macro_defs.h" #include "bli_tapi_macro_defs.h" +#ifdef BLIS_ENABLE_NO_UNDERSCORE_API +#define isamax_ isamax +#define idamax_ idamax +#define icamax_ icamax +#define izamax_ izamax +#define sasum_ sasum +#define dasum_ dasum +#define scasum_ scasum +#define dzasum_ dzasum +#define saxpy_ saxpy +#define daxpy_ daxpy +#define caxpy_ caxpy +#define zaxpy_ zaxpy +#define scopy_ scopy +#define dcopy_ dcopy +#define ccopy_ ccopy +#define zcopy_ zcopy +#define sdot_ sdot +#define ddot_ ddot +#define snrm2_ snrm2 +#define dnrm2_ dnrm2 +#define scnrm2_ scnrm2 +#define dznrm2_ dznrm2 +#define sscal_ sscal +#define dscal_ dscal +#define cscal_ cscal +#define csscal_ csscal +#define zscal_ zscal +#define zdscal_ zdscal +#define sswap_ sswap +#define dswap_ dswap +#define cswap_ cswap +#define zswap_ zswap +#define sgemv_ sgemv +#define dgemv_ dgemv +#define cgemv_ cgemv +#define zgemv_ zgemv +#define sger_ sger +#define dger_ dger +#define cgerc_ cgerc +#define cgeru_ cgeru +#define zgerc_ zgerc +#define zgeru_ zgeru +#define chemv_ chemv +#define zhemv_ zhemv +#define cher_ cher +#define zher_ zher +#define cher2_ cher2 +#define zher2_ zher2 +#define ssymv_ ssymv +#define dsymv_ dsymv +#define csymm_ csymm +#define zsymm_ zsymm +#define ssyr_ ssyr +#define dsyr_ dsyr +#define csyrk_ csyrk +#define csyrk_ csyrk +#define zsyrk_ zsyrk +#define ssyr2_ ssyr2 +#define dsyr2_ dsyr2 +#define csyr2k_ csyr2k +#define zsyr2k_ zsyr2k +#define strmv_ strmv +#define dtrmv_ dtrmv +#define ctrmv_ ctrmv +#define ztrmv_ ztrmv +#define strsv_ strsv +#define dtrsv_ dtrsv +#define ctrsv_ ctrsv +#define ztrsv_ ztrsv +#define sgemm_ sgemm +#define dgemm_ dgemm +#define cgemm_ cgemm +#define zgemm_ zgemm +#define chemm_ chemm +#define zhemm_ zhemm +#define cher2k_ cher2k +#define zher2k_ zher2k +#define cherk_ cherk +#define zherk_ zherk +#define ssymm_ ssymm +#define dsymm_ dsymm +#define ssyr2k_ ssyr2k +#define dsyr2k_ dsyr2k +#define ssyrk_ ssyrk +#define dsyrk_ dsyrk +#define strmm_ strmm +#define dtrmm_ dtrmm +#define ctrmm_ ctrmm +#define ztrmm_ ztrmm +#define strsm_ strsm +#define dtrsm_ dtrsm +#define ctrsm_ ctrsm +#define ztrsm_ ztrsm +#define lsame_ lsame +#define cimatcopy_ cimatcopy +#define comatadd_ comatadd +#define comatcopy2_ comatcopy2 +#define comatcopy_ comatcopy +#define dimatcopy_ dimatcopy +#define domatadd_ domatadd +#define domatcopy2_ domatcopy2 +#define domatcopy_ domatcopy +#define simatcopy_ simatcopy +#define somatadd_ somatadd +#define somatcopy2_ somatcopy2 +#define somatcopy_ somatcopy +#define zimatcopy_ zimatcopy +#define zomatadd_ zomatadd +#define zomatcopy2_ zomatcopy2 +#define zomatcopy_ zomatcopy +#endif + +#ifdef BLIS_ENABLE_UPPERCASE +#define caxpby CAXPBY +#define caxpy CAXPY +#define ccopy CCOPY +#define cdotc CDOTC +#define cdotcsub CDOTCSUB +#define cdotu CDOTU +#define cdotusub CDOTUSUB +#define cgbmv CGBMV +#define cgemm CGEMM +#define cgemm3m CGEMM3M +#define cgemm_batch CGEMM_BATCH +#define cgemmt CGEMMT +#define cgemv CGEMV +#define cgerc CGERC +#define cgeru CGERU +#define chbmv CHBMV +#define chemm CHEMM +#define chemv CHEMV +#define cher CHER +#define cher2 CHER2 +#define cher2k CHER2K +#define cherk CHERK +#define chpmv CHPMV +#define chpr CHPR +#define chpr2 CHPR2 +#define cimatcopy CIMATCOPY +#define comatadd COMATADD +#define comatcopy2 COMATCOPY2 +#define comatcopy COMATCOPY +#define crotg CROTG +#define cscal CSCAL +#define csrot CSROT +#define csscal CSSCAL +#define cswap CSWAP +#define csymm CSYMM +#define csyr2k CSYR2K +#define csyrk CSYRK +#define ctbmv CTBMV +#define ctbsv CTBSV +#define ctpmv CTPMV +#define ctpsv CTPSV +#define ctrmm CTRMM +#define ctrmv CTRMV +#define ctrsm CTRSM +#define ctrsv CTRSV +#define dasum DASUM +#define dasumsub DASUMSUB +#define daxpby DAXPBY +#define daxpy DAXPY +#define dcabs1 DCABS1 +#define dcopy DCOPY +#define ddot DDOT +#define ddotsub DDOTSUB +#define dgbmv DGBMV +#define dgemm DGEMM +#define dgemm_batch DGEMM_BATCH +#define dgemmt DGEMMT +#define dgemv DGEMV +#define dger DGER +#define dnrm2 DNRM2 +#define dnrm2sub DNRM2SUB +#define dimatcopy DIMATCOPY +#define domatadd DOMATADD +#define domatcopy2 DOMATCOPY2 +#define domatcopy DOMATCOPY +#define drot DROT +#define drotg DROTG +#define drotm DROTM +#define drotmg DROTMG +#define dsbmv DSBMV +#define dscal DSCAL +#define dsdot DSDOT +#define dsdotsub DSDOTSUB +#define dspmv DSPMV +#define dspr DSPR +#define dspr2 DSPR2 +#define dswap DSWAP +#define dsymm DSYMM +#define dsymv DSYMV +#define dsyr DSYR +#define dsyr2 DSYR2 +#define dsyr2k DSYR2K +#define dsyrk DSYRK +#define dtbmv DTBMV +#define dtbsv DTBSV +#define dtpmv DTPMV +#define dtpsv DTPSV +#define dtrmm DTRMM +#define dtrmv DTRMV +#define dtrsm DTRSM +#define dtrsv DTRSV +#define dzasum DZASUM +#define dzasumsub DZASUMSUB +#define dznrm2 DZNRM2 +#define dznrm2sub DZNRM2SUB +#define icamax ICAMAX +#define icamaxsub ICAMAXSUB +#define icamin ICAMIN +#define icaminsub ICAMINSUB +#define idamax IDAMAX +#define idamaxsub IDAMAXSUB +#define idamin IDAMIN +#define idaminsub IDAMINSUB +#define isamax ISAMAX +#define isamaxsub ISAMAXSUB +#define isamin ISAMIN +#define isaminsub ISAMINSUB +#define izamax IZAMAX +#define izamaxsub IZAMAXSUB +#define izamin IZAMIN +#define izaminsub IZAMINSUB +#define lsame LSAME +#define sasum SASUM +#define sasumsub SASUMSUB +#define saxpby SAXPBY +#define saxpy SAXPY +#define scabs1 SCABS1 +#define scasum SCASUM +#define scasumsub SCASUMSUB +#define scnrm2 SCNRM2 +#define scnrm2sub SCNRM2SUB +#define scopy SCOPY +#define sdot SDOT +#define sdotsub SDOTSUB +#define sdsdot SDSDOT +#define sdsdotsub SDSDOTSUB +#define sgbmv SGBMV +#define sgemm SGEMM +#define sgemm_batch SGEMM_BATCH +#define sgemmt SGEMMT +#define sgemv SGEMV +#define sger SGER +#define snrm2 SNRM2 +#define snrm2sub SNRM2SUB +#define simatcopy SIMATCOPY +#define somatadd SOMATADD +#define somatcopy2 SOMATCOPY2 +#define somatcopy SOMATCOPY +#define srot SROT +#define srotg SROTG +#define srotm SROTM +#define srotmg SROTMG +#define ssbmv SSBMV +#define sscal SSCAL +#define sspmv SSPMV +#define sspr SSPR +#define sspr2 SSPR2 +#define sswap SSWAP +#define ssymm SSYMM +#define ssymv SSYMV +#define ssyr SSYR +#define ssyr2 SSYR2 +#define ssyr2k SSYR2K +#define ssyrk SSYRK +#define stbmv STBMV +#define stbsv STBSV +#define stpmv STPMV +#define stpsv STPSV +#define strmm STRMM +#define strmv STRMV +#define strsm STRSM +#define strsv STRSV +#define xerbla XERBLA +#define zaxpby ZAXPBY +#define zaxpy ZAXPY +#define zcopy ZCOPY +#define zdotc ZDOTC +#define zdotcsub ZDOTCSUB +#define zdotu ZDOTU +#define zdotusub ZDOTUSUB +#define zdrot ZDROT +#define zdscal ZDSCAL +#define zgbmv ZGBMV +#define zgemm ZGEMM +#define zgemm3m ZGEMM3M +#define zgemm_batch ZGEMM_BATCH +#define zgemmt ZGEMMT +#define zgemv ZGEMV +#define zgerc ZGERC +#define zgeru ZGERU +#define zhbmv ZHBMV +#define zhemm ZHEMM +#define zhemv ZHEMV +#define zher ZHER +#define zher2 ZHER2 +#define zher2k ZHER2K +#define zherk ZHERK +#define zhpmv ZHPMV +#define zhpr ZHPR +#define zhpr2 ZHPR2 +#define zimatcopy ZIMATCOPY +#define zomatadd ZOMATADD +#define zomatcopy2 ZOMATCOPY2 +#define zomatcopy ZOMATCOPY +#define zrotg ZROTG +#define zscal ZSCAL +#define zswap ZSWAP +#define zsymm ZSYMM +#define zsyr2k ZSYR2K +#define zsyrk ZSYRK +#define ztbmv ZTBMV +#define ztbsv ZTBSV +#define ztpmv ZTPMV +#define ztpsv ZTPSV +#define ztrmm ZTRMM +#define ztrmv ZTRMV +#define ztrsm ZTRSM +#define ztrsv ZTRSV +#endif #endif diff --git a/frame/include/bli_system.h b/frame/include/bli_system.h index 96304f7e0..b614b287d 100644 --- a/frame/include/bli_system.h +++ b/frame/include/bli_system.h @@ -71,6 +71,7 @@ // Determine the target operating system. #if defined(_WIN32) || defined(__CYGWIN__) + #define BLIS #define BLIS_OS_WINDOWS 1 #elif defined(__gnu_hurd__) #define BLIS_OS_GNU 1 diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 33ece6e9b..e6c94b83f 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -89,8 +89,10 @@ typedef unsigned long int guint_t; // -- Boolean type -- // NOTE: bool_t is no longer used and has been replaced with C99's bool type. -//typedef bool bool_t; - +#ifdef _WIN32 +#undef bool +typedef gint_t bool; +#endif // BLIS uses TRUE and FALSE macro constants as possible boolean values, but we // define these macros in terms of true and false, respectively, which are // defined by C99 in stdbool.h. diff --git a/kernels/haswell/3/sup/d6x8/CMakeLists.txt b/kernels/haswell/3/sup/d6x8/CMakeLists.txt index ff9a061a6..3730d871f 100644 --- a/kernels/haswell/3/sup/d6x8/CMakeLists.txt +++ b/kernels/haswell/3/sup/d6x8/CMakeLists.txt @@ -2,15 +2,17 @@ target_sources("${PROJECT_NAME}" PRIVATE -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_r_haswell_ref_dMx1.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx1.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx2.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx4.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx8.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx2.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx4.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx6.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx8.c - ) + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_r_haswell_ref_dMx1.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx1.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx2.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx4.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx8.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx2.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx4.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx6.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx8.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_packm_haswell_asm_d6xk.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_packm_haswell_asm_d8xk.c + ) diff --git a/kernels/zen/3/CMakeLists.txt b/kernels/zen/3/CMakeLists.txt index bfe665c64..37d54bf69 100644 --- a/kernels/zen/3/CMakeLists.txt +++ b/kernels/zen/3/CMakeLists.txt @@ -5,6 +5,8 @@ target_sources("${PROJECT_NAME}" ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemm_small.c ${CMAKE_CURRENT_SOURCE_DIR}/bli_syrk_small.c ${CMAKE_CURRENT_SOURCE_DIR}/bli_trsm_small.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_dgemm_ref_k1.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemm_sqp.c ) add_subdirectory(sup)