Cmake script changes and blis.h changes for amd-staging-milan-3.0

AMD Internal : [CPUPL-1083]

Change-Id: Ia29a1f328ee32e2aec59a7fc70c04400d6ee6580
This commit is contained in:
Kumar, Phani
2020-11-13 20:07:29 +05:30
committed by Phani Kumar
parent 0a3d94c9a2
commit 477fc41fff
13 changed files with 45918 additions and 31 deletions

View File

@@ -8,7 +8,7 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/bin")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/bin")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/bin")
if (MSVC)
set(TARGET_ARCH zen2)
set(TARGET_ARCH zen3)
message(STATUS "Setting MSVC flags2")
message(${TARGET_ARCH})
option(BUILD_SHARED_LIBS "Build shared library" ON)
@@ -130,15 +130,17 @@ include_directories(${CMAKE_SOURCE_DIR}/kernels)
include_directories(${CMAKE_SOURCE_DIR}/kernels/haswell)
include_directories(${CMAKE_SOURCE_DIR}/kernels/haswell/3)
include_directories(${CMAKE_SOURCE_DIR}/kernels/haswell/3/sup)
include_directories(${CMAKE_SOURCE_DIR}/kernels/haswell/3/sup/d6x8)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen/1)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen/1f)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen/1m)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen/2)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen/3)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen/3/sup)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen2)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen2/1f)
include_directories(${CMAKE_SOURCE_DIR}/kernels/zen2/2)
#include_directories(${CMAKE_SOURCE_DIR}/kernels/zen2)
#include_directories(${CMAKE_SOURCE_DIR}/kernels/zen2/1f)
#include_directories(${CMAKE_SOURCE_DIR}/kernels/zen2/2)
file(GLOB headers ${CMAKE_SOURCE_DIR}/*.h)

View File

@@ -1,6 +1,9 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc ##
if(${TARGET_ARCH} STREQUAL zen2)
if(${TARGET_ARCH} STREQUAL zen3)
message("The configuration is : ${TARGET_ARCH}")
add_subdirectory(zen3)
elseif(${TARGET_ARCH} STREQUAL zen2)
message("The configuration is : ${TARGET_ARCH}")
add_subdirectory(zen2)
elseif(${TARGET_ARCH} STREQUAL zen)

View File

@@ -0,0 +1,7 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc ##
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bli_cntx_init_zen3.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_family_zen3.h
)

View File

@@ -79,7 +79,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}/bla_omatadd.h
#Add all subdirectories
# add_subdirectory(attic)
add_subdirectory(blis)
# add_subdirectory(blis)
add_subdirectory(cblas)
add_subdirectory(check)
add_subdirectory(f2c)

View File

@@ -166,4 +166,6 @@ ${CMAKE_CURRENT_SOURCE_DIR}/cblas_caxpby.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_zaxpby.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_cgemm3m.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_zgemm3m.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_dcabs1.c
${CMAKE_CURRENT_SOURCE_DIR}/cblas_scabs1.c
)

View File

@@ -14,6 +14,193 @@
#ifndef CBLAS_F77_H
#define CBLAS_F77_H
#ifdef _WIN32
/*
* Level 1 BLAS
*/
#define F77_xerbla xerbla
#define F77_srotg srotg
#define F77_srotmg srotmg
#define F77_srot srot
#define F77_srotm srotm
#define F77_drotg drotg
#define F77_drotmg drotmg
#define F77_drot drot
#define F77_drotm drotm
#define F77_sswap sswap
#define F77_scopy scopy
#define F77_saxpy saxpy
#define F77_isamax_sub isamaxsub
#define F77_dswap dswap
#define F77_dcopy dcopy
#define F77_daxpy daxpy
#define F77_idamax_sub idamaxsub
#define F77_cswap cswap
#define F77_ccopy ccopy
#define F77_caxpy caxpy
#define F77_icamax_sub icamaxsub
#define F77_zswap zswap
#define F77_zcopy zcopy
#define F77_zaxpy zaxpy
#define F77_zaxpby zaxpby
#define F77_izamax_sub izamaxsub
#define F77_sdot_sub sdotsub
#define F77_ddot_sub ddotsub
#define F77_dsdot_sub dsdotsub
#define F77_sscal sscal
#define F77_dscal dscal
#define F77_cscal cscal
#define F77_zscal zscal
#define F77_csscal csscal
#define F77_zdscal zdscal
#define F77_cdotu_sub cdotusub
#define F77_cdotc_sub cdotcsub
#define F77_zdotu_sub zdotusub
#define F77_zdotc_sub zdotcsub
#define F77_snrm2_sub snrm2sub
#define F77_sasum_sub sasumsub
#define F77_dnrm2_sub dnrm2sub
#define F77_dasum_sub dasumsub
#define F77_scnrm2_sub scnrm2sub
#define F77_scasum_sub scasumsub
#define F77_dznrm2_sub dznrm2sub
#define F77_dzasum_sub dzasumsub
#define F77_sdsdot_sub sdsdotsub
/*
* Level 2 BLAS
*/
#define F77_ssymv ssymv
#define F77_ssbmv ssbmv
#define F77_sspmv sspmv
#define F77_sger sger
#define F77_ssyr ssyr
#define F77_sspr sspr
#define F77_ssyr2 ssyr2
#define F77_sspr2 sspr2
#define F77_dsymv dsymv
#define F77_dsbmv dsbmv
#define F77_dspmv dspmv
#define F77_dger dger
#define F77_dsyr dsyr
#define F77_dspr dspr
#define F77_dsyr2 dsyr2
#define F77_dspr2 dspr2
#define F77_chemv chemv
#define F77_chbmv chbmv
#define F77_chpmv chpmv
#define F77_cgeru cgeru
#define F77_cgerc cgerc
#define F77_cher cher
#define F77_chpr chpr
#define F77_cher2 cher2
#define F77_chpr2 chpr2
#define F77_zhemv zhemv
#define F77_zhbmv zhbmv
#define F77_zhpmv zhpmv
#define F77_zgeru zgeru
#define F77_zgerc zgerc
#define F77_zher zher
#define F77_zhpr zhpr
#define F77_zher2 zher2
#define F77_zhpr2 zhpr2
#define F77_sgemv sgemv
#define F77_sgbmv sgbmv
#define F77_strmv strmv
#define F77_stbmv stbmv
#define F77_stpmv stpmv
#define F77_strsv strsv
#define F77_stbsv stbsv
#define F77_stpsv stpsv
#define F77_dgemv dgemv
#define F77_dgbmv dgbmv
#define F77_dtrmv dtrmv
#define F77_dtbmv dtbmv
#define F77_dtpmv dtpmv
#define F77_dtrsv dtrsv
#define F77_dtbsv dtbsv
#define F77_dtpsv dtpsv
#define F77_cgemv cgemv
#define F77_cgbmv cgbmv
#define F77_ctrmv ctrmv
#define F77_ctbmv ctbmv
#define F77_ctpmv ctpmv
#define F77_ctrsv ctrsv
#define F77_ctbsv ctbsv
#define F77_ctpsv ctpsv
#define F77_zgemv zgemv
#define F77_zgbmv zgbmv
#define F77_ztrmv ztrmv
#define F77_ztbmv ztbmv
#define F77_ztpmv ztpmv
#define F77_ztrsv ztrsv
#define F77_ztbsv ztbsv
#define F77_ztpsv ztpsv
/*
* Level 3 BLAS
*/
#define F77_chemm chemm
#define F77_cherk cherk
#define F77_cher2k cher2k
#define F77_zhemm zhemm
#define F77_zherk zherk
#define F77_zher2k zher2k
#define F77_sgemm sgemm
#define F77_ssymm ssymm
#define F77_ssyrk ssyrk
#define F77_ssyr2k ssyr2k
#define F77_strmm strmm
#define F77_strsm strsm
#define F77_dgemm dgemm
#define F77_dsymm dsymm
#define F77_dsyrk dsyrk
#define F77_dsyr2k dsyr2k
#define F77_dtrmm dtrmm
#define F77_dtrsm dtrsm
#define F77_cgemm cgemm
#define F77_csymm csymm
#define F77_csyrk csyrk
#define F77_csyr2k csyr2k
#define F77_ctrmm ctrmm
#define F77_ctrsm ctrsm
#define F77_zgemm zgemm
#define F77_zsymm zsymm
#define F77_zsyrk zsyrk
#define F77_zsyr2k zsyr2k
#define F77_ztrmm ztrmm
#define F77_ztrsm ztrsm
#define F77_dgemmt dgemmt
#define F77_sgemmt sgemmt
#define F77_cgemmt cgemmt
#define F77_zgemmt zgemmt
/*
* Aux Function
*/
#define F77_scabs1 scabs1
#define F77_dcabs1 dcabs1
/*
* -- BLAS Extension APIs --
*/
#define F77_saxpby saxpby
#define F77_daxpby daxpby
#define F77_caxpby caxpby
#define F77_zaxpby zaxpby
#define F77_cgemm3m cgemm3m
#define F77_zgemm3m zgemm3m
#define F77_isamin_sub isaminsub
#define F77_idamin_sub idaminsub
#define F77_icamin_sub icaminsub
#define F77_izamin_sub izaminsub
// -- Batch APIs --
#define F77_sgemm_batch sgemm_batch
#define F77_dgemm_batch dgemm_batch
#define F77_cgemm_batch cgemm_batch
#define F77_zgemm_batch zgemm_batch
#else
/*
* Level 1 BLAS
*/
@@ -199,6 +386,6 @@
#define F77_dgemm_batch dgemm_batch_
#define F77_cgemm_batch cgemm_batch_
#define F77_zgemm_batch zgemm_batch_
#endif
#endif /* CBLAS_F77_H */

View File

@@ -1,7 +1,7 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
if(${TARGET_ARCH} STREQUAL zen2)
if(${TARGET_ARCH} STREQUAL zen3)
add_subdirectory(haswell)
add_subdirectory(zen)
add_subdirectory(zen2)
#add_subdirectory(zen2)
endif()

View File

@@ -2,11 +2,10 @@
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_d6x8.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_d6x8m.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_d6x8n.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_d6x8.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_d6x8m.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_d6x8n.c
)
add_subdirectory(d6x8)

View File

@@ -0,0 +1,16 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_r_haswell_ref_dMx1.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx1.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx2.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx4.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rd_haswell_asm_dMx8.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx2.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx4.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx6.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemmsup_rv_haswell_asm_dMx8.c
)

View File

@@ -5,7 +5,7 @@ target_sources("${PROJECT_NAME}"
${CMAKE_CURRENT_SOURCE_DIR}/bli_kernels_zen.h
)
set(SUBDIRECTORIES "1" "1f" "1m" "3")
set(SUBDIRECTORIES "1" "1f" "1m" "2" "3")
#Add all subdirectories
foreach(VAR ${SUBDIRECTORIES})

View File

@@ -1,13 +1,25 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
add_executable(TestAminv test_aminv.c)
target_link_libraries(TestAminv debug "${PROJECT_NAME}.lib")
target_link_libraries(TestAminv optimized "${PROJECT_NAME}.lib")
add_executable(TestAxpyv test_axpyv.c)
target_link_libraries(TestAxpyv debug "${PROJECT_NAME}.lib")
target_link_libraries(TestAxpyv optimized "${PROJECT_NAME}.lib")
add_executable(TestAxpbyv test_axpbyv.c)
target_link_libraries(TestAxpbyv debug "${PROJECT_NAME}.lib")
target_link_libraries(TestAxpbyv optimized "${PROJECT_NAME}.lib")
add_executable(TestCopyv test_copyv.c)
target_link_libraries(TestCopyv debug "${PROJECT_NAME}.lib")
target_link_libraries(TestCopyv optimized "${PROJECT_NAME}.lib")
add_executable(TestCabs1 test_cabs1.c)
target_link_libraries(TestCabs1 debug "${PROJECT_NAME}.lib")
target_link_libraries(TestCabs1 optimized "${PROJECT_NAME}.lib")
add_executable(TestDotv test_dotv.c)
target_link_libraries(TestDotv debug "${PROJECT_NAME}.lib")
target_link_libraries(TestDotv optimized "${PROJECT_NAME}.lib")
@@ -16,6 +28,10 @@ add_executable(TestGemm test_gemm.c)
target_link_libraries(TestGemm debug "${PROJECT_NAME}.lib")
target_link_libraries(TestGemm optimized "${PROJECT_NAME}.lib")
add_executable(TestGemmBatch test_gemm_batch.c)
target_link_libraries(TestGemmBatch debug "${PROJECT_NAME}.lib")
target_link_libraries(TestGemmBatch optimized "${PROJECT_NAME}.lib")
add_executable(TestGemm3m test_gemm3m.c)
target_link_libraries(TestGemm3m debug "${PROJECT_NAME}.lib")
target_link_libraries(TestGemm3m optimized "${PROJECT_NAME}.lib")

View File

@@ -2393,10 +2393,10 @@ typedef enum
#define PASTEMACT(ch1, ch2, ch3, ch4) bli_ ## ch1 ## ch2 ## _ ## ch3 ## _ ## ch4
// Fortran-77 name-mangling macros.
#define PASTEF770(name) name ## _
#define PASTEF77(ch1,name) ch1 ## name ## _
#define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name ## _
#define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name ## _
#define PASTEF770(name) name
#define PASTEF77(ch1,name) ch1 ## name
#define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name
#define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name
// -- Include other groups of macros
@@ -41459,20 +41459,20 @@ INSERT_GENTPROT_BLAS( gemmt )
//
// Prototype Fortran-compatible BLIS interfaces.
//
BLIS_EXPORT_BLAS void PASTEF770(bli_thread_set_ways)
(
const f77_int* jc,
const f77_int* pc,
const f77_int* ic,
const f77_int* jr,
const f77_int* ir
);
BLIS_EXPORT_BLAS void PASTEF770(bli_thread_set_num_threads)
(
const f77_int* nt
);
//
//BLIS_EXPORT_BLAS void PASTEF770(bli_thread_set_ways)
// (
// const f77_int* jc,
// const f77_int* pc,
// const f77_int* ic,
// const f77_int* jr,
// const f77_int* ir
// );
//
//BLIS_EXPORT_BLAS void PASTEF770(bli_thread_set_num_threads)
// (
// const f77_int* nt
// );
// end b77_thread.h
@@ -44316,7 +44316,20 @@ void BLIS_EXPORT_BLAS cblas_xerbla(f77_int p, const char *rout, const char *form
BLIS_EXPORT_BLIS void bli_sleep( unsigned int secs );
// end bli_winsys.h
#define sswap_ sswap
#define dswap_ dswap
#define sscal_ sscal
#define dscal_ dscal
#define dgemv_ dgemv
#define sgemv_ sgemv
#define sdot_ sdot
#define ddot_ ddot
#define scopy_ scopy
#define dcopy_ dcopy
#define saxpy_ saxpy
#define daxpy_ daxpy
#define isamax_ isamax
#define idamax_ idamax
// begin aocldtl.h

45642
windows/zen3/blis.h Normal file

File diff suppressed because it is too large Load Diff