From 47744663d94cec0035447f1eebc622415166010b Mon Sep 17 00:00:00 2001 From: Meghana Vankadari Date: Mon, 5 Oct 2020 17:42:45 +0530 Subject: [PATCH] Enabling framework optimizations for zen family architectures. Details: - Introduced a new macro 'BLIS_CONFIG_EPYC' to enable blas and cblas framework optimizations for zen family configurations. - The macro needs to be defined in family.h files of respective arch configs. - Moved zen2-specific optimized kernels to zen folder, in order to be accessible to all zen family architectures. Change-Id: I8da2db6b7ab22ef350a01d86c214006e812eb06d --- config/zen/bli_family_zen.h | 8 +++- config/zen2/bli_family_zen2.h | 8 +++- config/zen3/bli_family_zen3.h | 7 ++- frame/2/gemv/bli_gemv_unf_var1.c | 4 +- frame/2/gemv/bli_gemv_unf_var2.c | 4 +- frame/compat/bla_amax.c | 4 +- frame/compat/bla_axpy.c | 6 +-- frame/compat/bla_copy.c | 4 +- frame/compat/bla_dot.c | 4 +- frame/compat/bla_gemv.c | 4 +- frame/compat/bla_scal.c | 4 +- frame/compat/bla_swap.c | 4 +- frame/compat/cblas/src/cblas_daxpy.c | 4 +- frame/compat/cblas/src/cblas_dcopy.c | 4 +- frame/compat/cblas/src/cblas_ddot.c | 6 +-- frame/compat/cblas/src/cblas_dscal.c | 4 +- frame/compat/cblas/src/cblas_dswap.c | 4 +- frame/compat/cblas/src/cblas_idamax.c | 4 +- frame/compat/cblas/src/cblas_isamax.c | 4 +- frame/compat/cblas/src/cblas_saxpy.c | 4 +- frame/compat/cblas/src/cblas_scopy.c | 4 +- frame/compat/cblas/src/cblas_sdot.c | 4 +- frame/compat/cblas/src/cblas_sscal.c | 4 +- frame/compat/cblas/src/cblas_sswap.c | 4 +- frame/include/bli_arch_config.h | 5 +- kernels/zen/1/bli_scalv_zen_int10.c | 6 +-- kernels/zen/1f/CMakeLists.txt | 3 +- .../{zen2 => zen}/1f/bli_axpyf_zen_int_5.c | 6 +-- kernels/zen/2/CMakeLists.txt | 9 ++++ kernels/{zen2 => zen}/2/bli_gemv_zen_ref.c | 2 +- kernels/zen/bli_kernels_zen.h | 8 ++++ kernels/zen2/1f/CMakeLists.txt | 9 ---- kernels/zen2/2/CMakeLists.txt | 9 ---- kernels/zen2/CMakeLists.txt | 11 ----- kernels/zen2/bli_kernels_zen2.h | 46 ------------------- 35 files changed, 91 insertions(+), 134 deletions(-) rename kernels/{zen2 => zen}/1f/bli_axpyf_zen_int_5.c (99%) create mode 100644 kernels/zen/2/CMakeLists.txt rename kernels/{zen2 => zen}/2/bli_gemv_zen_ref.c (98%) delete mode 100644 kernels/zen2/1f/CMakeLists.txt delete mode 100644 kernels/zen2/2/CMakeLists.txt delete mode 100644 kernels/zen2/CMakeLists.txt delete mode 100644 kernels/zen2/bli_kernels_zen2.h diff --git a/config/zen/bli_family_zen.h b/config/zen/bli_family_zen.h index 526e3a8b0..2c31a67ea 100644 --- a/config/zen/bli_family_zen.h +++ b/config/zen/bli_family_zen.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + Copyright (C) 2018 - 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -36,6 +36,12 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +//To enable framework optimizations for EPYC family processors. +//With this macro defined, we can call kernels directly from +//BLAS interfaces for levels 1 & 2. +//This macro needs to be defined for all EPYC configurations. +#define BLIS_CONFIG_EPYC + // By default, it is effective to parallelize the outer loops. // Setting these macros to 1 will force JR and IR inner loops // to be not paralleized. diff --git a/config/zen2/bli_family_zen2.h b/config/zen2/bli_family_zen2.h index 45088cfce..f81a2984c 100644 --- a/config/zen2/bli_family_zen2.h +++ b/config/zen2/bli_family_zen2.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019, Advanced Micro Devices, Inc + Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -36,6 +36,12 @@ #ifndef BLI_FAMILY_ZEN2_ #define BLI_FAMILY_ZEN2_ +//To enable framework optimizations for EPYC family processors. +//With this macro defined, we can call kernels directly from BLAS interfaces +//for levels 1 & 2. +//This macro needs to be defined for a;; EPYC configurations. +#define BLIS_CONFIG_EPYC + // By default, it is effective to parallelize the outer loops. // Setting these macros to 1 will force JR and IR inner loops // to be not paralleized. diff --git a/config/zen3/bli_family_zen3.h b/config/zen3/bli_family_zen3.h index 77cd17b75..d2dd58be3 100644 --- a/config/zen3/bli_family_zen3.h +++ b/config/zen3/bli_family_zen3.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -44,6 +44,11 @@ #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 +//To enable framework optimizations for EPYC family processors. +//With this macro defined, we can call kernels directly from BLAS interfaces +//for levels 1 & 2. +//This macro needs to be defined for all EPYC configurations. +#define BLIS_CONFIG_EPYC #define BLIS_ENABLE_SMALL_MATRIX #define BLIS_ENABLE_SMALL_MATRIX_TRSM diff --git a/frame/2/gemv/bli_gemv_unf_var1.c b/frame/2/gemv/bli_gemv_unf_var1.c index 9c1785251..677dd0c47 100644 --- a/frame/2/gemv/bli_gemv_unf_var1.c +++ b/frame/2/gemv/bli_gemv_unf_var1.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -102,7 +102,7 @@ void PASTEMAC(ch,varname) \ } \ } -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC void bli_dgemv_unf_var1 ( trans_t transa, diff --git a/frame/2/gemv/bli_gemv_unf_var2.c b/frame/2/gemv/bli_gemv_unf_var2.c index 795469af5..1332312a9 100644 --- a/frame/2/gemv/bli_gemv_unf_var2.c +++ b/frame/2/gemv/bli_gemv_unf_var2.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -129,7 +129,7 @@ void PASTEMAC(ch,varname) \ } \ } -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC void bli_dgemv_unf_var2 ( diff --git a/frame/compat/bla_amax.c b/frame/compat/bla_amax.c index cad33e1bf..82d1c70ff 100644 --- a/frame/compat/bla_amax.c +++ b/frame/compat/bla_amax.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -92,7 +92,7 @@ f77_int PASTEF772(i,chx,blasname) \ } #ifdef BLIS_ENABLE_BLAS -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC f77_int isamax_ ( diff --git a/frame/compat/bla_axpy.c b/frame/compat/bla_axpy.c index ad1689898..123341b2e 100644 --- a/frame/compat/bla_axpy.c +++ b/frame/compat/bla_axpy.c @@ -5,8 +5,8 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. - + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -85,7 +85,7 @@ void PASTEF77(ch,blasname) \ #ifdef BLIS_ENABLE_BLAS -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC void saxpy_ ( const f77_int* n, diff --git a/frame/compat/bla_copy.c b/frame/compat/bla_copy.c index d73796664..58fa48219 100644 --- a/frame/compat/bla_copy.c +++ b/frame/compat/bla_copy.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -82,7 +82,7 @@ void PASTEF77(ch,blasname) \ } #ifdef BLIS_ENABLE_BLAS -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC void scopy_ ( diff --git a/frame/compat/bla_dot.c b/frame/compat/bla_dot.c index 1fb420267..c825d3520 100644 --- a/frame/compat/bla_dot.c +++ b/frame/compat/bla_dot.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -134,7 +134,7 @@ dcomplex zdotc_ } #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC float sdot_ ( diff --git a/frame/compat/bla_gemv.c b/frame/compat/bla_gemv.c index c9578e396..130abb73c 100644 --- a/frame/compat/bla_gemv.c +++ b/frame/compat/bla_gemv.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -139,7 +139,7 @@ void PASTEF77(ch,blasname) \ #ifdef BLIS_ENABLE_BLAS -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC void dgemv_ ( const f77_char* transa, diff --git a/frame/compat/bla_scal.c b/frame/compat/bla_scal.c index 848d42094..73599e054 100644 --- a/frame/compat/bla_scal.c +++ b/frame/compat/bla_scal.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -86,7 +86,7 @@ void PASTEF772(chx,cha,blasname) \ } #ifdef BLIS_ENABLE_BLAS -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC void sscal_ ( diff --git a/frame/compat/bla_swap.c b/frame/compat/bla_swap.c index 8d8d40628..b24690f08 100644 --- a/frame/compat/bla_swap.c +++ b/frame/compat/bla_swap.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -81,7 +81,7 @@ void PASTEF77(ch,blasname) \ } #ifdef BLIS_ENABLE_BLAS -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC void sswap_ ( diff --git a/frame/compat/cblas/src/cblas_daxpy.c b/frame/compat/cblas/src/cblas_daxpy.c index fe1fa3914..eb4736767 100644 --- a/frame/compat/cblas/src/cblas_daxpy.c +++ b/frame/compat/cblas/src/cblas_daxpy.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. */ #include "cblas.h" #include "cblas_f77.h" @@ -22,7 +22,7 @@ void cblas_daxpy( f77_int N, double alpha, const double *X, #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; double* x0; double* y0; diff --git a/frame/compat/cblas/src/cblas_dcopy.c b/frame/compat/cblas/src/cblas_dcopy.c index e1b354540..c0be6fc0f 100644 --- a/frame/compat/cblas/src/cblas_dcopy.c +++ b/frame/compat/cblas/src/cblas_dcopy.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. * */ @@ -23,7 +23,7 @@ void cblas_dcopy( f77_int N, const double *X, #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; double* x0; double* y0; diff --git a/frame/compat/cblas/src/cblas_ddot.c b/frame/compat/cblas/src/cblas_ddot.c index b7bde2156..1752b880f 100644 --- a/frame/compat/cblas/src/cblas_ddot.c +++ b/frame/compat/cblas/src/cblas_ddot.c @@ -7,8 +7,8 @@ * It calls the fortran wrapper before calling ddot. * * Written by Keita Teranishi. 2/11/1998 - * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -24,7 +24,7 @@ double cblas_ddot( f77_int N, const double *X, #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; double* x0; double* y0; diff --git a/frame/compat/cblas/src/cblas_dscal.c b/frame/compat/cblas/src/cblas_dscal.c index fc9e36a8b..e0e3b29b4 100644 --- a/frame/compat/cblas/src/cblas_dscal.c +++ b/frame/compat/cblas/src/cblas_dscal.c @@ -8,7 +8,7 @@ * Written by Keita Teranishi. 2/11/1998 * * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. */ #include "cblas.h" #include "cblas_f77.h" @@ -22,7 +22,7 @@ void cblas_dscal( f77_int N, double alpha, double *X, #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; double* x0; inc_t incx0; diff --git a/frame/compat/cblas/src/cblas_dswap.c b/frame/compat/cblas/src/cblas_dswap.c index 9024e308a..5a5ccbf14 100644 --- a/frame/compat/cblas/src/cblas_dswap.c +++ b/frame/compat/cblas/src/cblas_dswap.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -23,7 +23,7 @@ void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y, #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; double* x0; double* y0; diff --git a/frame/compat/cblas/src/cblas_idamax.c b/frame/compat/cblas/src/cblas_idamax.c index 5924379f9..071482c36 100644 --- a/frame/compat/cblas/src/cblas_idamax.c +++ b/frame/compat/cblas/src/cblas_idamax.c @@ -7,7 +7,7 @@ * It calls the fortran wrapper before calling idamax. * * Written by Keita Teranishi. 2/11/1998 - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -22,7 +22,7 @@ f77_int cblas_idamax( f77_int N, const double *X, f77_int incX) #define F77_incX incX #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; double* x0; inc_t incx0; diff --git a/frame/compat/cblas/src/cblas_isamax.c b/frame/compat/cblas/src/cblas_isamax.c index ef622b717..81d13d099 100644 --- a/frame/compat/cblas/src/cblas_isamax.c +++ b/frame/compat/cblas/src/cblas_isamax.c @@ -7,7 +7,7 @@ * It calls the fortran wrapper before calling isamax. * * Written by Keita Teranishi. 2/11/1998 - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -22,7 +22,7 @@ f77_int cblas_isamax( f77_int N, const float *X, f77_int incX) #define F77_incX incX #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; float* x0; inc_t incx0; diff --git a/frame/compat/cblas/src/cblas_saxpy.c b/frame/compat/cblas/src/cblas_saxpy.c index c4aebbb2f..8c5ace43f 100644 --- a/frame/compat/cblas/src/cblas_saxpy.c +++ b/frame/compat/cblas/src/cblas_saxpy.c @@ -8,7 +8,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. */ #include "cblas.h" @@ -24,7 +24,7 @@ void cblas_saxpy( f77_int N, float alpha, const float *X, #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; float* x0; float* y0; diff --git a/frame/compat/cblas/src/cblas_scopy.c b/frame/compat/cblas/src/cblas_scopy.c index b8809d856..518d4f629 100644 --- a/frame/compat/cblas/src/cblas_scopy.c +++ b/frame/compat/cblas/src/cblas_scopy.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -22,7 +22,7 @@ void cblas_scopy( f77_int N, const float *X, #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; float* x0; diff --git a/frame/compat/cblas/src/cblas_sdot.c b/frame/compat/cblas/src/cblas_sdot.c index 602e7c957..d0f2b9b6b 100644 --- a/frame/compat/cblas/src/cblas_sdot.c +++ b/frame/compat/cblas/src/cblas_sdot.c @@ -8,7 +8,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -24,7 +24,7 @@ float cblas_sdot( f77_int N, const float *X, #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; float* x0; float* y0; diff --git a/frame/compat/cblas/src/cblas_sscal.c b/frame/compat/cblas/src/cblas_sscal.c index be6216807..6c4de4683 100644 --- a/frame/compat/cblas/src/cblas_sscal.c +++ b/frame/compat/cblas/src/cblas_sscal.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -22,7 +22,7 @@ void cblas_sscal( f77_int N, float alpha, float *X, #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; float* x0; diff --git a/frame/compat/cblas/src/cblas_sswap.c b/frame/compat/cblas/src/cblas_sswap.c index ea7aa7207..c09e154c0 100644 --- a/frame/compat/cblas/src/cblas_sswap.c +++ b/frame/compat/cblas/src/cblas_sswap.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. + * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -23,7 +23,7 @@ void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y, #define F77_incY incY #endif -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC dim_t n0; float* x0; diff --git a/frame/include/bli_arch_config.h b/frame/include/bli_arch_config.h index ddb45efd4..4155bc5e4 100644 --- a/frame/include/bli_arch_config.h +++ b/frame/include/bli_arch_config.h @@ -6,7 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP - Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc. + Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -250,9 +250,6 @@ CNTX_INIT_PROTS( generic ) // -- AMD64 architectures -- -#ifdef BLIS_KERNELS_ZEN2 -#include "bli_kernels_zen2.h" -#endif #ifdef BLIS_KERNELS_ZEN #include "bli_kernels_zen.h" #endif diff --git a/kernels/zen/1/bli_scalv_zen_int10.c b/kernels/zen/1/bli_scalv_zen_int10.c index 7a8ceae7b..66374c1c2 100644 --- a/kernels/zen/1/bli_scalv_zen_int10.c +++ b/kernels/zen/1/bli_scalv_zen_int10.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. + Copyright (C) 2017 - 2020, Advanced Micro Devices, Inc. All rights reserved. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without @@ -81,7 +81,7 @@ void bli_sscalv_zen_int10 if ( PASTEMAC(s,eq0)( *alpha ) ) { float* zero = bli_s0; -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC bli_ssetv_zen_int ( BLIS_NO_CONJUGATE, @@ -281,7 +281,7 @@ void bli_dscalv_zen_int10 if ( PASTEMAC(d,eq0)( *alpha ) ) { double* zero = bli_d0; -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC bli_dsetv_zen_int ( BLIS_NO_CONJUGATE, diff --git a/kernels/zen/1f/CMakeLists.txt b/kernels/zen/1f/CMakeLists.txt index d350c20b2..d3c20c76e 100644 --- a/kernels/zen/1f/CMakeLists.txt +++ b/kernels/zen/1f/CMakeLists.txt @@ -1,7 +1,8 @@ ##Copyright (C) 2020, Advanced Micro Devices, Inc.## target_sources("${PROJECT_NAME}" - PRIVATE + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_8.c ${CMAKE_CURRENT_SOURCE_DIR}/bli_dotxf_zen_int_8.c + ${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_5.c ) diff --git a/kernels/zen2/1f/bli_axpyf_zen_int_5.c b/kernels/zen/1f/bli_axpyf_zen_int_5.c similarity index 99% rename from kernels/zen2/1f/bli_axpyf_zen_int_5.c rename to kernels/zen/1f/bli_axpyf_zen_int_5.c index 5a919b622..9227b4c26 100644 --- a/kernels/zen2/1f/bli_axpyf_zen_int_5.c +++ b/kernels/zen/1f/bli_axpyf_zen_int_5.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -101,7 +101,7 @@ void bli_saxpyf_zen_int_5 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC for ( i = 0; i < b_n; ++i ) { float* a1 = a + (0 )*inca + (i )*lda; @@ -375,7 +375,7 @@ void bli_daxpyf_zen_int_5 // operation as a loop over axpyv. if ( b_n != fuse_fac ) { -#ifdef BLIS_CONFIG_ZEN2 +#ifdef BLIS_CONFIG_EPYC for ( i = 0; i < b_n; ++i ) { double* a1 = a + (0 )*inca + (i )*lda; diff --git a/kernels/zen/2/CMakeLists.txt b/kernels/zen/2/CMakeLists.txt new file mode 100644 index 000000000..480837c02 --- /dev/null +++ b/kernels/zen/2/CMakeLists.txt @@ -0,0 +1,9 @@ +##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.## + +target_sources("${PROJECT_NAME}" + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemv_zen_ref.c + ) + + + diff --git a/kernels/zen2/2/bli_gemv_zen_ref.c b/kernels/zen/2/bli_gemv_zen_ref.c similarity index 98% rename from kernels/zen2/2/bli_gemv_zen_ref.c rename to kernels/zen/2/bli_gemv_zen_ref.c index 84ac3f11d..fd36e73cd 100644 --- a/kernels/zen2/2/bli_gemv_zen_ref.c +++ b/kernels/zen/2/bli_gemv_zen_ref.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/kernels/zen/bli_kernels_zen.h b/kernels/zen/bli_kernels_zen.h index 243706c30..d9e69a06b 100644 --- a/kernels/zen/bli_kernels_zen.h +++ b/kernels/zen/bli_kernels_zen.h @@ -92,10 +92,18 @@ SETV_KER_PROT(double, d, setv_zen_int) AXPYF_KER_PROT( float, s, axpyf_zen_int_8 ) AXPYF_KER_PROT( double, d, axpyf_zen_int_8 ) +AXPYF_KER_PROT( float, s, axpyf_zen_int_5 ) +AXPYF_KER_PROT( double, d, axpyf_zen_int_5 ) + // dotxf (intrinsics) DOTXF_KER_PROT( float, s, dotxf_zen_int_8 ) DOTXF_KER_PROT( double, d, dotxf_zen_int_8 ) +// -- level-2 ---------------------------------------------------------------- + +//gemv(scalar code) +GEMV_KER_PROT( double, d, gemv_zen_ref_c ) + // -- level-3 sup -------------------------------------------------------------- // semmsup_rv diff --git a/kernels/zen2/1f/CMakeLists.txt b/kernels/zen2/1f/CMakeLists.txt deleted file mode 100644 index ea21ca644..000000000 --- a/kernels/zen2/1f/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -##Copyright (C) 2020, Advanced Micro Devices, Inc.## - -target_sources("${PROJECT_NAME}" - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_5.c - ) - - - diff --git a/kernels/zen2/2/CMakeLists.txt b/kernels/zen2/2/CMakeLists.txt deleted file mode 100644 index 83aaaf878..000000000 --- a/kernels/zen2/2/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -##Copyright (C) 2020, Advanced Micro Devices, Inc.## - -target_sources("${PROJECT_NAME}" - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/bli_gemv_zen_ref.c - ) - - - diff --git a/kernels/zen2/CMakeLists.txt b/kernels/zen2/CMakeLists.txt deleted file mode 100644 index 276de3358..000000000 --- a/kernels/zen2/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -##Copyright (C) 2020, Advanced Micro Devices, Inc.## - -target_sources("${PROJECT_NAME}" - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/bli_kernels_zen2.h - ) - -add_subdirectory(1f) -add_subdirectory(2) - - diff --git a/kernels/zen2/bli_kernels_zen2.h b/kernels/zen2/bli_kernels_zen2.h deleted file mode 100644 index db580a3eb..000000000 --- a/kernels/zen2/bli_kernels_zen2.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name(s) of the copyright holder(s) nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - - -// -- level-1f -- -AXPYF_KER_PROT( float, s, axpyf_zen_int_5 ) -AXPYF_KER_PROT( double, d, axpyf_zen_int_5 ) - -// -- level-2 -- - -//gemv(scalar code) -GEMV_KER_PROT( double, d, gemv_zen_ref_c ) - -