Enabling framework optimizations for zen family architectures.

Details:
- Introduced a new macro 'BLIS_CONFIG_EPYC' to enable blas and cblas
  framework optimizations for zen family configurations.
- The macro needs to be defined in family.h files of respective arch
  configs.
- Moved zen2-specific optimized kernels to zen folder, in order to be
  accessible to all zen family architectures.

Change-Id: I8da2db6b7ab22ef350a01d86c214006e812eb06d
This commit is contained in:
Meghana Vankadari
2020-10-05 17:42:45 +05:30
parent 74c9d3f36e
commit 47744663d9
35 changed files with 91 additions and 134 deletions

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
Copyright (C) 2018 - 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -36,6 +36,12 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
//To enable framework optimizations for EPYC family processors.
//With this macro defined, we can call kernels directly from
//BLAS interfaces for levels 1 & 2.
//This macro needs to be defined for all EPYC configurations.
#define BLIS_CONFIG_EPYC
// By default, it is effective to parallelize the outer loops.
// Setting these macros to 1 will force JR and IR inner loops
// to be not paralleized.

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019, Advanced Micro Devices, Inc
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -36,6 +36,12 @@
#ifndef BLI_FAMILY_ZEN2_
#define BLI_FAMILY_ZEN2_
//To enable framework optimizations for EPYC family processors.
//With this macro defined, we can call kernels directly from BLAS interfaces
//for levels 1 & 2.
//This macro needs to be defined for a;; EPYC configurations.
#define BLIS_CONFIG_EPYC
// By default, it is effective to parallelize the outer loops.
// Setting these macros to 1 will force JR and IR inner loops
// to be not paralleized.

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -44,6 +44,11 @@
#define BLIS_THREAD_MAX_IR 1
#define BLIS_THREAD_MAX_JR 1
//To enable framework optimizations for EPYC family processors.
//With this macro defined, we can call kernels directly from BLAS interfaces
//for levels 1 & 2.
//This macro needs to be defined for all EPYC configurations.
#define BLIS_CONFIG_EPYC
#define BLIS_ENABLE_SMALL_MATRIX
#define BLIS_ENABLE_SMALL_MATRIX_TRSM

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -102,7 +102,7 @@ void PASTEMAC(ch,varname) \
} \
}
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
void bli_dgemv_unf_var1
(
trans_t transa,

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -129,7 +129,7 @@ void PASTEMAC(ch,varname) \
} \
}
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
void bli_dgemv_unf_var2
(

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -92,7 +92,7 @@ f77_int PASTEF772(i,chx,blasname) \
}
#ifdef BLIS_ENABLE_BLAS
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
f77_int isamax_
(

View File

@@ -5,8 +5,8 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
@@ -85,7 +85,7 @@ void PASTEF77(ch,blasname) \
#ifdef BLIS_ENABLE_BLAS
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
void saxpy_
(
const f77_int* n,

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -82,7 +82,7 @@ void PASTEF77(ch,blasname) \
}
#ifdef BLIS_ENABLE_BLAS
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
void scopy_
(

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -134,7 +134,7 @@ dcomplex zdotc_
}
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
float sdot_
(

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -139,7 +139,7 @@ void PASTEF77(ch,blasname) \
#ifdef BLIS_ENABLE_BLAS
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
void dgemv_
(
const f77_char* transa,

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -86,7 +86,7 @@ void PASTEF772(chx,cha,blasname) \
}
#ifdef BLIS_ENABLE_BLAS
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
void sscal_
(

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -81,7 +81,7 @@ void PASTEF77(ch,blasname) \
}
#ifdef BLIS_ENABLE_BLAS
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
void sswap_
(

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*/
#include "cblas.h"
#include "cblas_f77.h"
@@ -22,7 +22,7 @@ void cblas_daxpy( f77_int N, double alpha, const double *X,
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
double* y0;

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
@@ -23,7 +23,7 @@ void cblas_dcopy( f77_int N, const double *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
double* y0;

View File

@@ -7,8 +7,8 @@
* It calls the fortran wrapper before calling ddot.
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -24,7 +24,7 @@ double cblas_ddot( f77_int N, const double *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
double* y0;

View File

@@ -8,7 +8,7 @@
* Written by Keita Teranishi. 2/11/1998
*
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*/
#include "cblas.h"
#include "cblas_f77.h"
@@ -22,7 +22,7 @@ void cblas_dscal( f77_int N, double alpha, double *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
inc_t incx0;

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -23,7 +23,7 @@ void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y,
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
double* y0;

View File

@@ -7,7 +7,7 @@
* It calls the fortran wrapper before calling idamax.
*
* Written by Keita Teranishi. 2/11/1998
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -22,7 +22,7 @@ f77_int cblas_idamax( f77_int N, const double *X, f77_int incX)
#define F77_incX incX
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
inc_t incx0;

View File

@@ -7,7 +7,7 @@
* It calls the fortran wrapper before calling isamax.
*
* Written by Keita Teranishi. 2/11/1998
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -22,7 +22,7 @@ f77_int cblas_isamax( f77_int N, const float *X, f77_int incX)
#define F77_incX incX
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;
inc_t incx0;

View File

@@ -8,7 +8,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*/
#include "cblas.h"
@@ -24,7 +24,7 @@ void cblas_saxpy( f77_int N, float alpha, const float *X,
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;
float* y0;

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -22,7 +22,7 @@ void cblas_scopy( f77_int N, const float *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;

View File

@@ -8,7 +8,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -24,7 +24,7 @@ float cblas_sdot( f77_int N, const float *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;
float* y0;

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -22,7 +22,7 @@ void cblas_sscal( f77_int N, float alpha, float *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -23,7 +23,7 @@ void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;

View File

@@ -6,7 +6,7 @@
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc.
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -250,9 +250,6 @@ CNTX_INIT_PROTS( generic )
// -- AMD64 architectures --
#ifdef BLIS_KERNELS_ZEN2
#include "bli_kernels_zen2.h"
#endif
#ifdef BLIS_KERNELS_ZEN
#include "bli_kernels_zen.h"
#endif

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc.
Copyright (C) 2017 - 2020, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
@@ -81,7 +81,7 @@ void bli_sscalv_zen_int10
if ( PASTEMAC(s,eq0)( *alpha ) )
{
float* zero = bli_s0;
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
bli_ssetv_zen_int
(
BLIS_NO_CONJUGATE,
@@ -281,7 +281,7 @@ void bli_dscalv_zen_int10
if ( PASTEMAC(d,eq0)( *alpha ) )
{
double* zero = bli_d0;
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
bli_dsetv_zen_int
(
BLIS_NO_CONJUGATE,

View File

@@ -1,7 +1,8 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
target_sources("${PROJECT_NAME}"
PRIVATE
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_8.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_dotxf_zen_int_8.c
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_5.c
)

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -101,7 +101,7 @@ void bli_saxpyf_zen_int_5
// operation as a loop over axpyv.
if ( b_n != fuse_fac )
{
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
for ( i = 0; i < b_n; ++i )
{
float* a1 = a + (0 )*inca + (i )*lda;
@@ -375,7 +375,7 @@ void bli_daxpyf_zen_int_5
// operation as a loop over axpyv.
if ( b_n != fuse_fac )
{
#ifdef BLIS_CONFIG_ZEN2
#ifdef BLIS_CONFIG_EPYC
for ( i = 0; i < b_n; ++i )
{
double* a1 = a + (0 )*inca + (i )*lda;

View File

@@ -0,0 +1,9 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.##
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemv_zen_ref.c
)

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -92,10 +92,18 @@ SETV_KER_PROT(double, d, setv_zen_int)
AXPYF_KER_PROT( float, s, axpyf_zen_int_8 )
AXPYF_KER_PROT( double, d, axpyf_zen_int_8 )
AXPYF_KER_PROT( float, s, axpyf_zen_int_5 )
AXPYF_KER_PROT( double, d, axpyf_zen_int_5 )
// dotxf (intrinsics)
DOTXF_KER_PROT( float, s, dotxf_zen_int_8 )
DOTXF_KER_PROT( double, d, dotxf_zen_int_8 )
// -- level-2 ----------------------------------------------------------------
//gemv(scalar code)
GEMV_KER_PROT( double, d, gemv_zen_ref_c )
// -- level-3 sup --------------------------------------------------------------
// semmsup_rv

View File

@@ -1,9 +0,0 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_5.c
)

View File

@@ -1,9 +0,0 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemv_zen_ref.c
)

View File

@@ -1,11 +0,0 @@
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
target_sources("${PROJECT_NAME}"
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/bli_kernels_zen2.h
)
add_subdirectory(1f)
add_subdirectory(2)

View File

@@ -1,46 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// -- level-1f --
AXPYF_KER_PROT( float, s, axpyf_zen_int_5 )
AXPYF_KER_PROT( double, d, axpyf_zen_int_5 )
// -- level-2 --
//gemv(scalar code)
GEMV_KER_PROT( double, d, gemv_zen_ref_c )