mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Enabling framework optimizations for zen family architectures.
Details: - Introduced a new macro 'BLIS_CONFIG_EPYC' to enable blas and cblas framework optimizations for zen family configurations. - The macro needs to be defined in family.h files of respective arch configs. - Moved zen2-specific optimized kernels to zen folder, in order to be accessible to all zen family architectures. Change-Id: I8da2db6b7ab22ef350a01d86c214006e812eb06d
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2018 - 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -36,6 +36,12 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
//To enable framework optimizations for EPYC family processors.
|
||||
//With this macro defined, we can call kernels directly from
|
||||
//BLAS interfaces for levels 1 & 2.
|
||||
//This macro needs to be defined for all EPYC configurations.
|
||||
#define BLIS_CONFIG_EPYC
|
||||
|
||||
// By default, it is effective to parallelize the outer loops.
|
||||
// Setting these macros to 1 will force JR and IR inner loops
|
||||
// to be not paralleized.
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc
|
||||
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -36,6 +36,12 @@
|
||||
#ifndef BLI_FAMILY_ZEN2_
|
||||
#define BLI_FAMILY_ZEN2_
|
||||
|
||||
//To enable framework optimizations for EPYC family processors.
|
||||
//With this macro defined, we can call kernels directly from BLAS interfaces
|
||||
//for levels 1 & 2.
|
||||
//This macro needs to be defined for a;; EPYC configurations.
|
||||
#define BLIS_CONFIG_EPYC
|
||||
|
||||
// By default, it is effective to parallelize the outer loops.
|
||||
// Setting these macros to 1 will force JR and IR inner loops
|
||||
// to be not paralleized.
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -44,6 +44,11 @@
|
||||
#define BLIS_THREAD_MAX_IR 1
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
//To enable framework optimizations for EPYC family processors.
|
||||
//With this macro defined, we can call kernels directly from BLAS interfaces
|
||||
//for levels 1 & 2.
|
||||
//This macro needs to be defined for all EPYC configurations.
|
||||
#define BLIS_CONFIG_EPYC
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -102,7 +102,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
}
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
void bli_dgemv_unf_var1
|
||||
(
|
||||
trans_t transa,
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -129,7 +129,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
}
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
void bli_dgemv_unf_var2
|
||||
(
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -92,7 +92,7 @@ f77_int PASTEF772(i,chx,blasname) \
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
f77_int isamax_
|
||||
(
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
@@ -85,7 +85,7 @@ void PASTEF77(ch,blasname) \
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
void saxpy_
|
||||
(
|
||||
const f77_int* n,
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -82,7 +82,7 @@ void PASTEF77(ch,blasname) \
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
void scopy_
|
||||
(
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -134,7 +134,7 @@ dcomplex zdotc_
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
float sdot_
|
||||
(
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -139,7 +139,7 @@ void PASTEF77(ch,blasname) \
|
||||
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
void dgemv_
|
||||
(
|
||||
const f77_char* transa,
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -86,7 +86,7 @@ void PASTEF772(chx,cha,blasname) \
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
void sscal_
|
||||
(
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -81,7 +81,7 @@ void PASTEF77(ch,blasname) \
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
void sswap_
|
||||
(
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
@@ -22,7 +22,7 @@ void cblas_daxpy( f77_int N, double alpha, const double *X,
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -23,7 +23,7 @@ void cblas_dcopy( f77_int N, const double *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
* It calls the fortran wrapper before calling ddot.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -24,7 +24,7 @@ double cblas_ddot( f77_int N, const double *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
@@ -22,7 +22,7 @@ void cblas_dscal( f77_int N, double alpha, double *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
inc_t incx0;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -23,7 +23,7 @@ void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y,
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* It calls the fortran wrapper before calling idamax.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -22,7 +22,7 @@ f77_int cblas_idamax( f77_int N, const double *X, f77_int incX)
|
||||
#define F77_incX incX
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
inc_t incx0;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* It calls the fortran wrapper before calling isamax.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -22,7 +22,7 @@ f77_int cblas_isamax( f77_int N, const float *X, f77_int incX)
|
||||
#define F77_incX incX
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
inc_t incx0;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "cblas.h"
|
||||
@@ -24,7 +24,7 @@ void cblas_saxpy( f77_int N, float alpha, const float *X,
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -22,7 +22,7 @@ void cblas_scopy( f77_int N, const float *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -24,7 +24,7 @@ float cblas_sdot( f77_int N, const float *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -22,7 +22,7 @@ void cblas_sscal( f77_int N, float alpha, float *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -23,7 +23,7 @@ void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
|
||||
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2019 - 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -250,9 +250,6 @@ CNTX_INIT_PROTS( generic )
|
||||
|
||||
// -- AMD64 architectures --
|
||||
|
||||
#ifdef BLIS_KERNELS_ZEN2
|
||||
#include "bli_kernels_zen2.h"
|
||||
#endif
|
||||
#ifdef BLIS_KERNELS_ZEN
|
||||
#include "bli_kernels_zen.h"
|
||||
#endif
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2017 - 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -81,7 +81,7 @@ void bli_sscalv_zen_int10
|
||||
if ( PASTEMAC(s,eq0)( *alpha ) )
|
||||
{
|
||||
float* zero = bli_s0;
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
bli_ssetv_zen_int
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
@@ -281,7 +281,7 @@ void bli_dscalv_zen_int10
|
||||
if ( PASTEMAC(d,eq0)( *alpha ) )
|
||||
{
|
||||
double* zero = bli_d0;
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
bli_dsetv_zen_int
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
|
||||
|
||||
target_sources("${PROJECT_NAME}"
|
||||
PRIVATE
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_8.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_dotxf_zen_int_8.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_5.c
|
||||
)
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -101,7 +101,7 @@ void bli_saxpyf_zen_int_5
|
||||
// operation as a loop over axpyv.
|
||||
if ( b_n != fuse_fac )
|
||||
{
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
for ( i = 0; i < b_n; ++i )
|
||||
{
|
||||
float* a1 = a + (0 )*inca + (i )*lda;
|
||||
@@ -375,7 +375,7 @@ void bli_daxpyf_zen_int_5
|
||||
// operation as a loop over axpyv.
|
||||
if ( b_n != fuse_fac )
|
||||
{
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
for ( i = 0; i < b_n; ++i )
|
||||
{
|
||||
double* a1 = a + (0 )*inca + (i )*lda;
|
||||
9
kernels/zen/2/CMakeLists.txt
Normal file
9
kernels/zen/2/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
##Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.##
|
||||
|
||||
target_sources("${PROJECT_NAME}"
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemv_zen_ref.c
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -92,10 +92,18 @@ SETV_KER_PROT(double, d, setv_zen_int)
|
||||
AXPYF_KER_PROT( float, s, axpyf_zen_int_8 )
|
||||
AXPYF_KER_PROT( double, d, axpyf_zen_int_8 )
|
||||
|
||||
AXPYF_KER_PROT( float, s, axpyf_zen_int_5 )
|
||||
AXPYF_KER_PROT( double, d, axpyf_zen_int_5 )
|
||||
|
||||
// dotxf (intrinsics)
|
||||
DOTXF_KER_PROT( float, s, dotxf_zen_int_8 )
|
||||
DOTXF_KER_PROT( double, d, dotxf_zen_int_8 )
|
||||
|
||||
// -- level-2 ----------------------------------------------------------------
|
||||
|
||||
//gemv(scalar code)
|
||||
GEMV_KER_PROT( double, d, gemv_zen_ref_c )
|
||||
|
||||
// -- level-3 sup --------------------------------------------------------------
|
||||
// semmsup_rv
|
||||
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
|
||||
|
||||
target_sources("${PROJECT_NAME}"
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_axpyf_zen_int_5.c
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
|
||||
|
||||
target_sources("${PROJECT_NAME}"
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_gemv_zen_ref.c
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
##Copyright (C) 2020, Advanced Micro Devices, Inc.##
|
||||
|
||||
target_sources("${PROJECT_NAME}"
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_kernels_zen2.h
|
||||
)
|
||||
|
||||
add_subdirectory(1f)
|
||||
add_subdirectory(2)
|
||||
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
// -- level-1f --
|
||||
AXPYF_KER_PROT( float, s, axpyf_zen_int_5 )
|
||||
AXPYF_KER_PROT( double, d, axpyf_zen_int_5 )
|
||||
|
||||
// -- level-2 --
|
||||
|
||||
//gemv(scalar code)
|
||||
GEMV_KER_PROT( double, d, gemv_zen_ref_c )
|
||||
|
||||
|
||||
Reference in New Issue
Block a user