From d78defa0fc02a752275ef611c20b6c8e89a23faa Mon Sep 17 00:00:00 2001 From: nphaniku Date: Mon, 8 Mar 2021 22:31:19 +0530 Subject: [PATCH] AOCL Windows: 3.1 BLIS changes 1. CMake script changes for adding new files to the build. 2. Added Upper case support for couple of API's. 3. bool is not support in clang so defined it. AMD Internal : [CPUPL-1422] Change-Id: I4cac8fb8ef86cd6bacfd29e3b1a84c5da1310f61 --- CMakeLists.txt | 4 +- build/bli_win_config.h.in | 2 - frame/include/bli_macro_defs.h | 330 ++++++++++++++++++++++ frame/include/bli_system.h | 1 + frame/include/bli_type_defs.h | 6 +- kernels/haswell/3/sup/d6x8/CMakeLists.txt | 22 +- kernels/zen/3/CMakeLists.txt | 2 + 7 files changed, 351 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index de5b828a2..452452d1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.## +##Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.## cmake_minimum_required(VERSION 3.0.0) @@ -88,7 +88,7 @@ option(BLIS_ENABLE_ILP64 "ENABLE BLIS ILP64" OFF) option(ENABLE_INT_TYPE_SIZE "Integer size type" OFF) option(ENABLE_BLASTEST "Enable the blastest" OFF) option(ENABLE_TESTCPP_TESTING "Enabling testcpp" OFF) -option (ENABLE_NO_UNDERSCORE_API "export APIs without underscore" OFF) +option (ENABLE_NO_UNDERSCORE_API "export APIs without underscore" ON) option (ENABLE_UPPERCASE "export APIs with uppercase" OFF) if(ENABLE_NO_UNDERSCORE_API) diff --git a/build/bli_win_config.h.in b/build/bli_win_config.h.in index 1aaa182c6..0f6c63a74 100644 --- a/build/bli_win_config.h.in +++ b/build/bli_win_config.h.in @@ -9,8 +9,6 @@ #cmakedefine BLIS_ENABLE_OPENMP -#cmakedefine BLIS_ENABLE_PTHREADS - #cmakedefine BLIS_ENABLE_JRIR_SLAB #cmakedefine BLIS_ENABLE_JRIR_RR diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index 77bc14a37..b08a76520 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -157,10 +157,17 @@ #define PASTEMACT(ch1, ch2, ch3, ch4) bli_ ## ch1 ## ch2 ## _ ## ch3 ## _ ## ch4 // Fortran-77 name-mangling macros. +#ifdef BLIS_ENABLE_NO_UNDERSCORE_API +#define PASTEF770(name) name +#define PASTEF77(ch1,name) ch1 ## name +#define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name +#define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name +#else #define PASTEF770(name) name ## _ #define PASTEF77(ch1,name) ch1 ## name ## _ #define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name ## _ #define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name ## _ +#endif // -- Include other groups of macros @@ -181,5 +188,328 @@ #include "bli_oapi_macro_defs.h" #include "bli_tapi_macro_defs.h" +#ifdef BLIS_ENABLE_NO_UNDERSCORE_API +#define isamax_ isamax +#define idamax_ idamax +#define icamax_ icamax +#define izamax_ izamax +#define sasum_ sasum +#define dasum_ dasum +#define scasum_ scasum +#define dzasum_ dzasum +#define saxpy_ saxpy +#define daxpy_ daxpy +#define caxpy_ caxpy +#define zaxpy_ zaxpy +#define scopy_ scopy +#define dcopy_ dcopy +#define ccopy_ ccopy +#define zcopy_ zcopy +#define sdot_ sdot +#define ddot_ ddot +#define snrm2_ snrm2 +#define dnrm2_ dnrm2 +#define scnrm2_ scnrm2 +#define dznrm2_ dznrm2 +#define sscal_ sscal +#define dscal_ dscal +#define cscal_ cscal +#define csscal_ csscal +#define zscal_ zscal +#define zdscal_ zdscal +#define sswap_ sswap +#define dswap_ dswap +#define cswap_ cswap +#define zswap_ zswap +#define sgemv_ sgemv +#define dgemv_ dgemv +#define cgemv_ cgemv +#define zgemv_ zgemv +#define sger_ sger +#define dger_ dger +#define cgerc_ cgerc +#define cgeru_ cgeru +#define zgerc_ zgerc +#define zgeru_ zgeru +#define chemv_ chemv +#define zhemv_ zhemv +#define cher_ cher +#define zher_ zher +#define cher2_ cher2 +#define zher2_ zher2 +#define ssymv_ ssymv +#define dsymv_ dsymv +#define csymm_ csymm +#define zsymm_ zsymm +#define ssyr_ ssyr +#define dsyr_ dsyr +#define csyrk_ csyrk +#define csyrk_ csyrk +#define zsyrk_ zsyrk +#define ssyr2_ ssyr2 +#define dsyr2_ dsyr2 +#define csyr2k_ csyr2k +#define zsyr2k_ zsyr2k +#define strmv_ strmv +#define dtrmv_ dtrmv +#define ctrmv_ ctrmv +#define ztrmv_ ztrmv +#define strsv_ strsv +#define dtrsv_ dtrsv +#define ctrsv_ ctrsv +#define ztrsv_ ztrsv +#define sgemm_ sgemm +#define dgemm_ dgemm +#define cgemm_ cgemm +#define zgemm_ zgemm +#define chemm_ chemm +#define zhemm_ zhemm +#define cher2k_ cher2k +#define zher2k_ zher2k +#define cherk_ cherk +#define zherk_ zherk +#define ssymm_ ssymm +#define dsymm_ dsymm +#define ssyr2k_ ssyr2k +#define dsyr2k_ dsyr2k +#define ssyrk_ ssyrk +#define dsyrk_ dsyrk +#define strmm_ strmm +#define dtrmm_ dtrmm +#define ctrmm_ ctrmm +#define ztrmm_ ztrmm +#define strsm_ strsm +#define dtrsm_ dtrsm +#define ctrsm_ ctrsm +#define ztrsm_ ztrsm +#define lsame_ lsame +#define cimatcopy_ cimatcopy +#define comatadd_ comatadd +#define comatcopy2_ comatcopy2 +#define comatcopy_ comatcopy +#define dimatcopy_ dimatcopy +#define domatadd_ domatadd +#define domatcopy2_ domatcopy2 +#define domatcopy_ domatcopy +#define simatcopy_ simatcopy +#define somatadd_ somatadd +#define somatcopy2_ somatcopy2 +#define somatcopy_ somatcopy +#define zimatcopy_ zimatcopy +#define zomatadd_ zomatadd +#define zomatcopy2_ zomatcopy2 +#define zomatcopy_ zomatcopy +#endif + +#ifdef BLIS_ENABLE_UPPERCASE +#define caxpby CAXPBY +#define caxpy CAXPY +#define ccopy CCOPY +#define cdotc CDOTC +#define cdotcsub CDOTCSUB +#define cdotu CDOTU +#define cdotusub CDOTUSUB +#define cgbmv CGBMV +#define cgemm CGEMM +#define cgemm3m CGEMM3M +#define cgemm_batch CGEMM_BATCH +#define cgemmt CGEMMT +#define cgemv CGEMV +#define cgerc CGERC +#define cgeru CGERU +#define chbmv CHBMV +#define chemm CHEMM +#define chemv CHEMV +#define cher CHER +#define cher2 CHER2 +#define cher2k CHER2K +#define cherk CHERK +#define chpmv CHPMV +#define chpr CHPR +#define chpr2 CHPR2 +#define cimatcopy CIMATCOPY +#define comatadd COMATADD +#define comatcopy2 COMATCOPY2 +#define comatcopy COMATCOPY +#define crotg CROTG +#define cscal CSCAL +#define csrot CSROT +#define csscal CSSCAL +#define cswap CSWAP +#define csymm CSYMM +#define csyr2k CSYR2K +#define csyrk CSYRK +#define ctbmv CTBMV +#define ctbsv CTBSV +#define ctpmv CTPMV +#define ctpsv CTPSV +#define ctrmm CTRMM +#define ctrmv CTRMV +#define ctrsm CTRSM +#define ctrsv CTRSV +#define dasum DASUM +#define dasumsub DASUMSUB +#define daxpby DAXPBY +#define daxpy DAXPY +#define dcabs1 DCABS1 +#define dcopy DCOPY +#define ddot DDOT +#define ddotsub DDOTSUB +#define dgbmv DGBMV +#define dgemm DGEMM +#define dgemm_batch DGEMM_BATCH +#define dgemmt DGEMMT +#define dgemv DGEMV +#define dger DGER +#define dnrm2 DNRM2 +#define dnrm2sub DNRM2SUB +#define dimatcopy DIMATCOPY +#define domatadd DOMATADD +#define domatcopy2 DOMATCOPY2 +#define domatcopy DOMATCOPY +#define drot DROT +#define drotg DROTG +#define drotm DROTM +#define drotmg DROTMG +#define dsbmv DSBMV +#define dscal DSCAL +#define dsdot DSDOT +#define dsdotsub DSDOTSUB +#define dspmv DSPMV +#define dspr DSPR +#define dspr2 DSPR2 +#define dswap DSWAP +#define dsymm DSYMM +#define dsymv DSYMV +#define dsyr DSYR +#define dsyr2 DSYR2 +#define dsyr2k DSYR2K +#define dsyrk DSYRK +#define dtbmv DTBMV +#define dtbsv DTBSV +#define dtpmv DTPMV +#define dtpsv DTPSV +#define dtrmm DTRMM +#define dtrmv DTRMV +#define dtrsm DTRSM +#define dtrsv DTRSV +#define dzasum DZASUM +#define dzasumsub DZASUMSUB +#define dznrm2 DZNRM2 +#define dznrm2sub DZNRM2SUB +#define icamax ICAMAX +#define icamaxsub ICAMAXSUB +#define icamin ICAMIN +#define icaminsub ICAMINSUB +#define idamax IDAMAX +#define idamaxsub IDAMAXSUB +#define idamin IDAMIN +#define idaminsub IDAMINSUB +#define isamax ISAMAX +#define isamaxsub ISAMAXSUB +#define isamin ISAMIN +#define isaminsub ISAMINSUB +#define izamax IZAMAX +#define izamaxsub IZAMAXSUB +#define izamin IZAMIN +#define izaminsub IZAMINSUB +#define lsame LSAME +#define sasum SASUM +#define sasumsub SASUMSUB +#define saxpby SAXPBY +#define saxpy SAXPY +#define scabs1 SCABS1 +#define scasum SCASUM +#define scasumsub SCASUMSUB +#define scnrm2 SCNRM2 +#define scnrm2sub SCNRM2SUB +#define scopy SCOPY +#define sdot SDOT +#define sdotsub SDOTSUB +#define sdsdot SDSDOT +#define sdsdotsub SDSDOTSUB +#define sgbmv SGBMV +#define sgemm SGEMM +#define sgemm_batch SGEMM_BATCH +#define sgemmt SGEMMT +#define sgemv SGEMV +#define sger SGER +#define snrm2 SNRM2 +#define snrm2sub SNRM2SUB +#define simatcopy SIMATCOPY +#define somatadd SOMATADD +#define somatcopy2 SOMATCOPY2 +#define somatcopy SOMATCOPY +#define srot SROT +#define srotg SROTG +#define srotm SROTM +#define srotmg SROTMG +#define ssbmv SSBMV +#define sscal SSCAL +#define sspmv SSPMV +#define sspr SSPR +#define sspr2 SSPR2 +#define sswap SSWAP +#define ssymm SSYMM +#define ssymv SSYMV +#define ssyr SSYR +#define ssyr2 SSYR2 +#define ssyr2k SSYR2K +#define ssyrk SSYRK +#define stbmv STBMV +#define stbsv STBSV +#define stpmv STPMV +#define stpsv STPSV +#define strmm STRMM +#define strmv STRMV +#define strsm STRSM +#define strsv STRSV +#define xerbla XERBLA +#define zaxpby ZAXPBY +#define zaxpy ZAXPY +#define zcopy ZCOPY +#define zdotc ZDOTC +#define zdotcsub ZDOTCSUB +#define zdotu ZDOTU +#define zdotusub ZDOTUSUB +#define zdrot ZDROT +#define zdscal ZDSCAL +#define zgbmv ZGBMV +#define zgemm ZGEMM +#define zgemm3m ZGEMM3M +#define zgemm_batch ZGEMM_BATCH +#define zgemmt ZGEMMT +#define zgemv ZGEMV +#define zgerc ZGERC +#define zgeru ZGERU +#define zhbmv ZHBMV +#define zhemm ZHEMM +#define zhemv ZHEMV +#define zher ZHER +#define zher2 ZHER2 +#define zher2k ZHER2K +#define zherk ZHERK +#define zhpmv ZHPMV +#define zhpr ZHPR +#define zhpr2 ZHPR2 +#define zimatcopy ZIMATCOPY +#define zomatadd ZOMATADD +#define zomatcopy2 ZOMATCOPY2 +#define zomatcopy ZOMATCOPY +#define zrotg ZROTG +#define zscal ZSCAL +#define zswap ZSWAP +#define zsymm ZSYMM +#define zsyr2k ZSYR2K +#define zsyrk ZSYRK +#define ztbmv ZTBMV +#define ztbsv ZTBSV +#define ztpmv ZTPMV +#define ztpsv ZTPSV +#define ztrmm ZTRMM +#define ztrmv ZTRMV +#define ztrsm ZTRSM +#define ztrsv ZTRSV +#endif #endif diff --git a/frame/include/bli_system.h b/frame/include/bli_system.h index 96304f7e0..b614b287d 100644 --- a/frame/include/bli_system.h +++ b/frame/include/bli_system.h @@ -71,6 +71,7 @@ // Determine the target operating system. #if defined(_WIN32) || defined(__CYGWIN__) + #define BLIS #define BLIS_OS_WINDOWS 1 #elif defined(__gnu_hurd__) #define BLIS_OS_GNU 1 diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 33ece6e9b..e6c94b83f 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -89,8 +89,10 @@ typedef unsigned long int guint_t; // -- Boolean type -- // NOTE: bool_t is no longer used and has been replaced with C99's bool type. -//typedef bool bool_t; - +#ifdef _WIN32 +#undef bool +typedef gint_t bool; +#endif // BLIS uses TRUE and FALSE macro constants as possible boolean values, but we // define these macros in terms of true and false, respectively, which are // defined by C99 in stdbool.h. diff --git a/kernels/haswell/3/sup/d6x8/CMakeLists.txt b/kernels/haswell/3/sup/d6x8/CMakeLists.txt index ff9a061a6..3730d871f 100644 --- a/kernels/haswell/3/sup/d6x8/CMakeLists.txt +++ b/kernels/haswell/3/sup/d6x8/CMakeLists.txt @@ -2,15 +2,17 @@ target_sources("${PROJECT_NAME}" PRIVATE -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_r_haswell_ref_dMx1.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx1.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx2.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx4.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx8.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx2.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx4.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx6.c -${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx8.c - ) + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_r_haswell_ref_dMx1.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx1.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx2.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx4.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx8.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx2.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx4.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx6.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx8.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_packm_haswell_asm_d6xk.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_packm_haswell_asm_d8xk.c + ) diff --git a/kernels/zen/3/CMakeLists.txt b/kernels/zen/3/CMakeLists.txt index bfe665c64..37d54bf69 100644 --- a/kernels/zen/3/CMakeLists.txt +++ b/kernels/zen/3/CMakeLists.txt @@ -5,6 +5,8 @@ target_sources("${PROJECT_NAME}" ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemm_small.c ${CMAKE_CURRENT_SOURCE_DIR}/bli_syrk_small.c ${CMAKE_CURRENT_SOURCE_DIR}/bli_trsm_small.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_dgemm_ref_k1.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemm_sqp.c ) add_subdirectory(sup)