Add blis_impl wrappers for matrix copy etc APIs

BLAS and BLIS extension APIs for copying and transposing matrices
currently only have one interface option. This patch adds a
blis_impl layer and makes the top level interface enabled only if
BLIS_ENABLE_BLAS is enabled, as with standard BLAS interfaces.

Change-Id: I1b6c668e8492305b16e8735b9ed83bea3c0d3b6c
This commit is contained in:
Edward Smyth
2025-03-31 09:07:36 -04:00
parent 81d219e3f8
commit e0b86c69af
8 changed files with 607 additions and 57 deletions

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -156,7 +156,7 @@ static dim_t bli_ziMatCopy_cc
dim_t ldb
);
void simatcopy_
void simatcopy_blis_impl
(
f77_char* trans,
f77_int* rows,
@@ -226,8 +226,23 @@ void simatcopy_
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void simatcopy_
(
f77_char* trans,
f77_int* rows,
f77_int* cols,
const float* alpha,
float* aptr,
f77_int* lda,
f77_int* ldb
)
{
simatcopy_blis_impl(trans,rows,cols,alpha,aptr,lda,ldb);
}
#endif
void dimatcopy_
void dimatcopy_blis_impl
(
f77_char* trans,
f77_int* rows,
@@ -296,8 +311,23 @@ void dimatcopy_
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void dimatcopy_
(
f77_char* trans,
f77_int* rows,
f77_int* cols,
const double* alpha,
double* aptr,
f77_int* lda,
f77_int* ldb
)
{
dimatcopy_blis_impl(trans,rows,cols,alpha,aptr,lda,ldb);
}
#endif
void cimatcopy_
void cimatcopy_blis_impl
(
f77_char* trans,
f77_int* rows,
@@ -366,8 +396,23 @@ void cimatcopy_
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void cimatcopy_
(
f77_char* trans,
f77_int* rows,
f77_int* cols,
const scomplex* alpha,
scomplex* aptr,
f77_int* lda,
f77_int* ldb
)
{
cimatcopy_blis_impl(trans,rows,cols,alpha,aptr,lda,ldb);
}
#endif
void zimatcopy_
void zimatcopy_blis_impl
(
f77_char* trans,
f77_int* rows,
@@ -436,6 +481,21 @@ void zimatcopy_
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void zimatcopy_
(
f77_char* trans,
f77_int* rows,
f77_int* cols,
const dcomplex* alpha,
dcomplex* aptr,
f77_int* lda,
f77_int* ldb
)
{
zimatcopy_blis_impl(trans,rows,cols,alpha,aptr,lda,ldb);
}
#endif
// suffix cn means - column major & non-trans
static dim_t bli_siMatCopy_cn(dim_t rows,dim_t cols,const float alpha,float* a,dim_t lda,dim_t ldb)

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -96,7 +96,7 @@ static void bli_zconjugate(dcomplex* A,dim_t cols,dim_t rows)
A[i].imag *=(-1);
}
void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc)
void somatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
@@ -168,8 +168,14 @@ void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void somatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc)
{
somatadd_blis_impl (transa,transb,m,n,alpha,A,lda,beta,B,ldb,C,ldc);
}
#endif
void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc)
void domatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
@@ -240,8 +246,14 @@ void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void domatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc)
{
domatadd_blis_impl (transa,transb,m,n,alpha,A,lda,beta,B,ldb,C,ldc);
}
#endif
void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc)
void comatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
@@ -325,8 +337,14 @@ void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void comatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc)
{
comatadd_blis_impl (transa,transb,m,n,alpha,A,lda,beta,B,ldb,C,ldc);
}
#endif
void zomatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc)
void zomatadd_blis_impl (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
@@ -412,6 +430,12 @@ void zomatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void zomatadd_ (f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc)
{
zomatadd_blis_impl (transa,transb,m,n,alpha,A,lda,beta,B,ldb,C,ldc);
}
#endif
static dim_t bli_soMatAdd_cn(dim_t rows,dim_t cols,const float alpha,float* aptr,dim_t lda,const float beta,float* bptr,dim_t ldb,float* C,dim_t ldc)
{

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -60,7 +60,7 @@ static dim_t bli_zoMatCopy_cr(dim_t rows, dim_t cols, const dcomplex alpha, cons
static dim_t bli_zoMatCopy_cc(dim_t rows, dim_t cols, const dcomplex alpha, const dcomplex* a, dim_t lda, dcomplex* b, dim_t ldb);
void somatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb)
void somatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
if ( !(*trans == 'n' || *trans == 'N' ||
@@ -101,8 +101,14 @@ void somatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alp
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void somatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb)
{
somatcopy_blis_impl (trans,rows,cols,alpha,aptr,lda,bptr,ldb);
}
#endif
void domatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb)
void domatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
@@ -144,8 +150,14 @@ void domatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* al
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void domatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb)
{
domatcopy_blis_impl (trans,rows,cols,alpha,aptr,lda,bptr,ldb);
}
#endif
void comatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb)
void comatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
@@ -188,8 +200,14 @@ void comatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex*
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void comatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb)
{
comatcopy_blis_impl (trans,rows,cols,alpha,aptr,lda,bptr,ldb);
}
#endif
void zomatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb)
void zomatcopy_blis_impl (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb)
{
AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
//bli_init_once();
@@ -231,6 +249,12 @@ void zomatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex*
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void zomatcopy_ (f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb)
{
zomatcopy_blis_impl (trans,rows,cols,alpha,aptr,lda,bptr,ldb);
}
#endif
// suffix cn means - column major & non-trans
static dim_t bli_soMatCopy_cn(dim_t rows, dim_t cols, const float alpha, const float* a, dim_t lda, float* b, dim_t ldb)

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -192,7 +192,7 @@ static dim_t bli_zoMatCopy2_cc
dim_t strideb
);
void somatcopy2_
void somatcopy2_blis_impl
(
f77_char* trans,
f77_int* rows,
@@ -264,8 +264,27 @@ void somatcopy2_
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void somatcopy2_
(
f77_char* trans,
f77_int* rows,
f77_int* cols,
const float* alpha,
const float* aptr,
f77_int* lda,
f77_int* stridea,
float* bptr,
f77_int* ldb,
f77_int* strideb
)
{
somatcopy2_blis_impl(trans,rows,cols,alpha,aptr,lda,stridea,bptr,ldb,
strideb);
}
#endif
void domatcopy2_
void domatcopy2_blis_impl
(
f77_char* trans,
f77_int* rows,
@@ -337,8 +356,27 @@ void domatcopy2_
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void domatcopy2_
(
f77_char* trans,
f77_int* rows,
f77_int* cols,
const double* alpha,
const double* aptr,
f77_int* lda,
f77_int* stridea,
double* bptr,
f77_int* ldb,
f77_int* strideb
)
{
domatcopy2_blis_impl(trans,rows,cols,alpha,aptr,lda,stridea,bptr,ldb,
strideb);
}
#endif
void comatcopy2_
void comatcopy2_blis_impl
(
f77_char* trans,
f77_int* rows,
@@ -410,8 +448,27 @@ void comatcopy2_
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void comatcopy2_
(
f77_char* trans,
f77_int* rows,
f77_int* cols,
const scomplex* alpha,
const scomplex* aptr,
f77_int* lda,
f77_int* stridea,
scomplex* bptr,
f77_int* ldb,
f77_int* strideb
)
{
comatcopy2_blis_impl(trans,rows,cols,alpha,aptr,lda,stridea,bptr,ldb,
strideb);
}
#endif
void zomatcopy2_
void zomatcopy2_blis_impl
(
f77_char* trans,
f77_int* rows,
@@ -483,6 +540,25 @@ void zomatcopy2_
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return ;
}
#ifdef BLIS_ENABLE_BLAS
void zomatcopy2_
(
f77_char* trans,
f77_int* rows,
f77_int* cols,
const dcomplex* alpha,
const dcomplex* aptr,
f77_int* lda,
f77_int* stridea,
dcomplex* bptr,
f77_int* ldb,
f77_int* strideb
)
{
zomatcopy2_blis_impl(trans,rows,cols,alpha,aptr,lda,stridea,bptr,ldb,
strideb);
}
#endif
// suffix cn means - column major & non-trans
static dim_t bli_soMatCopy2_cn

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2021 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2021 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -2443,19 +2443,17 @@ void CGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* tra
cgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
//#ifdef BLIS_ENABLE_CBLAS
void CIMATCOPY(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha,scomplex* aptr, f77_int* lda, f77_int* ldb)
void CIMATCOPY(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb)
{
cimatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
void cimatcopy(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha,scomplex* aptr, f77_int* lda, f77_int* ldb)
void cimatcopy(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb)
{
cimatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
void CIMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha,scomplex* aptr, f77_int* lda, f77_int* ldb)
void CIMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb)
{
cimatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
@@ -2505,8 +2503,6 @@ void COMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* a
comatcopy_( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
//#endif // BLIS_ENABLE_CBLAS
#ifdef BLIS_ENABLE_CBLAS
void DASUMSUB(const f77_int* n, const double* x, const f77_int* incx, double* rval)
@@ -2644,6 +2640,21 @@ void DGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* tra
dgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
void DIMATCOPY( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb)
{
dimatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
void dimatcopy( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb)
{
dimatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
void DIMATCOPY_( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb)
{
dimatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
#ifdef BLIS_ENABLE_CBLAS
void DNRM2SUB(const f77_int* n, const double* x, const f77_int* incx, double *rval)
@@ -2663,8 +2674,6 @@ void DNRM2SUB_(const f77_int* n, const double* x, const f77_int* incx, double *r
#endif // BLIS_ENABLE_CBLAS
//#ifdef BLIS_ENABLE_CBLAS
void DOMATADD(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc)
{
domatadd_( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
@@ -2710,8 +2719,6 @@ void DOMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols, const double* alp
domatcopy_( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
//#endif // BLIS_ENABLE_CBLAS
#ifdef BLIS_ENABLE_CBLAS
void DZASUMSUB(const f77_int* n, const dcomplex* x, const f77_int* incx, double* rval)
@@ -3105,25 +3112,21 @@ void SGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* tra
sgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
//#ifdef BLIS_ENABLE_CBLAS
void SIMATCOPY( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha,float* aptr, f77_int* lda, f77_int* ldb)
void SIMATCOPY( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, float* aptr, f77_int* lda, f77_int* ldb)
{
simatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
void simatcopy( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha,float* aptr, f77_int* lda, f77_int* ldb)
void simatcopy( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, float* aptr, f77_int* lda, f77_int* ldb)
{
simatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
void SIMATCOPY_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha,float* aptr, f77_int* lda, f77_int* ldb)
void SIMATCOPY_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, float* aptr, f77_int* lda, f77_int* ldb)
{
simatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
//#endif // BLIS_ENABLE_CBLAS
#ifdef BLIS_ENABLE_CBLAS
void SNRM2SUB( const f77_int* n, const float* x, const f77_int* incx, float *rval)
@@ -3143,8 +3146,6 @@ void SNRM2SUB_( const f77_int* n, const float* x, const f77_int* incx, float *rv
#endif // BLIS_ENABLE_CBLAS
//#ifdef BLIS_ENABLE_CBLAS
void SOMATADD( f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc)
{
somatadd_( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
@@ -3190,8 +3191,6 @@ void SOMATCOPY_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alp
somatcopy_( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
//#endif // BLIS_ENABLE_CBLAS
void ZAXPBY( const f77_int* n, const dcomplex* alpha, const dcomplex *x, const f77_int* incx, const dcomplex* beta, dcomplex *y, const f77_int* incy)
{
zaxpby_blis_impl( n, alpha, x, incx, beta, y, incy);
@@ -3286,19 +3285,17 @@ void ZGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* tra
zgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
//#ifdef BLIS_ENABLE_CBLAS
void ZIMATCOPY(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha,dcomplex* aptr, f77_int* lda, f77_int* ldb)
void ZIMATCOPY(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, dcomplex* aptr, f77_int* lda, f77_int* ldb)
{
zimatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
void zimatcopy(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha,dcomplex* aptr, f77_int* lda, f77_int* ldb)
void zimatcopy(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, dcomplex* aptr, f77_int* lda, f77_int* ldb)
{
zimatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
void ZIMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha,dcomplex* aptr, f77_int* lda, f77_int* ldb)
void ZIMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, dcomplex* aptr, f77_int* lda, f77_int* ldb)
{
zimatcopy_( trans, rows, cols, alpha, aptr, lda, ldb);
}
@@ -3348,8 +3345,6 @@ void ZOMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* a
zomatcopy_( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
//#endif // BLIS_ENABLE_CBLAS
float SCABS1(bla_scomplex* z)
{
return scabs1_blis_impl( z);

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -1674,8 +1674,6 @@ BLIS_EXPORT_BLIS void zgemmt( const f77_char* uploc, const f77_char* transa, c
BLIS_EXPORT_BLIS void ZGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc);
//#ifdef BLIS_ENABLE_CBLAS
BLIS_EXPORT_BLIS void CIMATCOPY(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void cimatcopy(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb);
@@ -1708,6 +1706,14 @@ BLIS_EXPORT_BLIS void COMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols
BLIS_EXPORT_BLIS void DIMATCOPY( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void dimatcopy( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void DIMATCOPY_( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void DOMATADD(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc);
BLIS_EXPORT_BLIS void domatadd(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc);
@@ -1794,8 +1800,6 @@ BLIS_EXPORT_BLIS void zomatcopy(f77_char* trans, f77_int* rows, f77_int* cols,
BLIS_EXPORT_BLIS void ZOMATCOPY_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb);
//#endif // BLIS_ENABLE_CBLAS
#endif
#endif

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2021 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2021 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -2441,6 +2441,66 @@ void CGEMMT_BLIS_IMPL_( const f77_char* uploc, const f77_char* transa, const f77
cgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
void CIMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb)
{
cimatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void cimatcopy_blis_impl_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb)
{
cimatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void CIMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb)
{
cimatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void COMATADD_BLIS_IMPL(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc)
{
comatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void comatadd_blis_impl_(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc)
{
comatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void COMATADD_BLIS_IMPL_(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda,const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc)
{
comatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void COMATCOPY2_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda,f77_int* stridea, scomplex* bptr, f77_int* ldb,f77_int* strideb)
{
comatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void comatcopy2_blis_impl_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda,f77_int* stridea, scomplex* bptr, f77_int* ldb,f77_int* strideb)
{
comatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void COMATCOPY2_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda,f77_int* stridea, scomplex* bptr, f77_int* ldb,f77_int* strideb)
{
comatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void COMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb)
{
comatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
void comatcopy_blis_impl_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb)
{
comatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
void COMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb)
{
comatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
#ifdef BLIS_ENABLE_CBLAS
void DASUMSUB_BLIS_IMPL(const f77_int* n, const double* x, const f77_int* incx, double* rval)
@@ -2578,6 +2638,21 @@ void DGEMMT_BLIS_IMPL_( const f77_char* uploc, const f77_char* transa, const f77
dgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
void DIMATCOPY_BLIS_IMPL( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb)
{
dimatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void dimatcopy_blis_impl_( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb)
{
dimatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void DIMATCOPY_BLIS_IMPL_( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb)
{
dimatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
#ifdef BLIS_ENABLE_CBLAS
void DNRM2SUB_BLIS_IMPL(const f77_int* n, const double* x, const f77_int* incx, double *rval)
@@ -2597,6 +2672,51 @@ void DNRM2SUB_BLIS_IMPL_(const f77_int* n, const double* x, const f77_int* incx,
#endif // BLIS_ENABLE_CBLAS
void DOMATADD_BLIS_IMPL(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc)
{
domatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void domatadd_blis_impl_(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc)
{
domatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void DOMATADD_BLIS_IMPL_(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc)
{
domatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void DOMATCOPY2_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda,f77_int* stridea, double* bptr, f77_int* ldb,f77_int* strideb)
{
domatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void domatcopy2_blis_impl_(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda,f77_int* stridea, double* bptr, f77_int* ldb,f77_int* strideb)
{
domatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void DOMATCOPY2_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda,f77_int* stridea, double* bptr, f77_int* ldb,f77_int* strideb)
{
domatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void DOMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb)
{
domatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
void domatcopy_blis_impl_(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb)
{
domatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
void DOMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb)
{
domatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
#ifdef BLIS_ENABLE_CBLAS
void DZASUMSUB_BLIS_IMPL(const f77_int* n, const dcomplex* x, const f77_int* incx, double* rval)
@@ -2990,6 +3110,21 @@ void SGEMMT_BLIS_IMPL_( const f77_char* uploc, const f77_char* transa, const f77
sgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
void SIMATCOPY_BLIS_IMPL( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, float* aptr, f77_int* lda, f77_int* ldb)
{
simatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void simatcopy_blis_impl_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, float* aptr, f77_int* lda, f77_int* ldb)
{
simatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void SIMATCOPY_BLIS_IMPL_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, float* aptr, f77_int* lda, f77_int* ldb)
{
simatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
#ifdef BLIS_ENABLE_CBLAS
void SNRM2SUB_BLIS_IMPL( const f77_int* n, const float* x, const f77_int* incx, float *rval)
@@ -3009,6 +3144,51 @@ void SNRM2SUB_BLIS_IMPL_( const f77_int* n, const float* x, const f77_int* incx,
#endif // BLIS_ENABLE_CBLAS
void SOMATADD_BLIS_IMPL( f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc)
{
somatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void somatadd_blis_impl_( f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc)
{
somatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void SOMATADD_BLIS_IMPL_( f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc)
{
somatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void SOMATCOPY2_BLIS_IMPL( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda,f77_int* stridea, float* bptr, f77_int* ldb,f77_int* strideb)
{
somatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void somatcopy2_blis_impl_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda,f77_int* stridea, float* bptr, f77_int* ldb,f77_int* strideb)
{
somatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void SOMATCOPY2_BLIS_IMPL_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda,f77_int* stridea, float* bptr, f77_int* ldb,f77_int* strideb)
{
somatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void SOMATCOPY_BLIS_IMPL( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb)
{
somatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
void somatcopy_blis_impl_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb)
{
somatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
void SOMATCOPY_BLIS_IMPL_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb)
{
somatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
void ZAXPBY_BLIS_IMPL( const f77_int* n, const dcomplex* alpha, const dcomplex *x, const f77_int* incx, const dcomplex* beta, dcomplex *y, const f77_int* incy)
{
zaxpby_blis_impl( n, alpha, x, incx, beta, y, incy);
@@ -3103,6 +3283,66 @@ void ZGEMMT_BLIS_IMPL_( const f77_char* uploc, const f77_char* transa, const f77
zgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
void ZIMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, dcomplex* aptr, f77_int* lda, f77_int* ldb)
{
zimatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void zimatcopy_blis_impl_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, dcomplex* aptr, f77_int* lda, f77_int* ldb)
{
zimatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void ZIMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, dcomplex* aptr, f77_int* lda, f77_int* ldb)
{
zimatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, ldb);
}
void ZOMATADD_BLIS_IMPL(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc)
{
zomatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void zomatadd_blis_impl_(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc)
{
zomatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void ZOMATADD_BLIS_IMPL_(f77_char* transa,f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda,const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc)
{
zomatadd_blis_impl( transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
}
void ZOMATCOPY2_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda,f77_int* stridea, dcomplex* bptr, f77_int* ldb,f77_int* strideb)
{
zomatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void zomatcopy2_blis_impl_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda,f77_int* stridea, dcomplex* bptr, f77_int* ldb,f77_int* strideb)
{
zomatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void ZOMATCOPY2_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda,f77_int* stridea, dcomplex* bptr, f77_int* ldb,f77_int* strideb)
{
zomatcopy2_blis_impl( trans, rows, cols, alpha, aptr, lda, stridea, bptr, ldb, strideb);
}
void ZOMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb)
{
zomatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
void zomatcopy_blis_impl_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb)
{
zomatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
void ZOMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb)
{
zomatcopy_blis_impl( trans, rows, cols, alpha, aptr, lda, bptr, ldb);
}
float SCABS1_BLIS_IMPL(bla_scomplex* z)
{
return scabs1_blis_impl( z);

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -1671,6 +1671,133 @@ BLIS_EXPORT_BLIS void zgemmt_blis_impl_( const f77_char* uploc, const f77_char*
BLIS_EXPORT_BLIS void ZGEMMT_BLIS_IMPL_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc);
BLIS_EXPORT_BLIS void CIMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void cimatcopy_blis_impl(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void CIMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, scomplex* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void COMATADD_BLIS_IMPL(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda, const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc);
BLIS_EXPORT_BLIS void comatadd_blis_impl(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda, const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc);
BLIS_EXPORT_BLIS void COMATADD_BLIS_IMPL_(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const scomplex* alpha, const scomplex* A, f77_int* lda, const scomplex* beta, scomplex* B, f77_int* ldb, scomplex* C, f77_int* ldc);
BLIS_EXPORT_BLIS void COMATCOPY2_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, f77_int* stridea, scomplex* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void comatcopy2_blis_impl(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, f77_int* stridea, scomplex* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void COMATCOPY2_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, f77_int* stridea, scomplex* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void COMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void comatcopy_blis_impl(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void COMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha, const scomplex* aptr, f77_int* lda, scomplex* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void DIMATCOPY_BLIS_IMPL( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void dimatcopy_blis_impl( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void DIMATCOPY_BLIS_IMPL_( f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, double* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void DOMATADD_BLIS_IMPL(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc);
BLIS_EXPORT_BLIS void domatadd_blis_impl(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc);
BLIS_EXPORT_BLIS void DOMATADD_BLIS_IMPL_(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const double* alpha, const double* A, f77_int* lda, const double* beta, const double* B, f77_int* ldb, double* C, f77_int* ldc);
BLIS_EXPORT_BLIS void DOMATCOPY2_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, f77_int* stridea, double* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void domatcopy2_blis_impl(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, f77_int* stridea, double* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void DOMATCOPY2_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, f77_int* stridea, double* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void DOMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void domatcopy_blis_impl(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void DOMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const double* alpha, const double* aptr, f77_int* lda, double* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void SIMATCOPY_BLIS_IMPL( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, float* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void simatcopy_blis_impl( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, float* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void SIMATCOPY_BLIS_IMPL_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, float* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void SOMATADD_BLIS_IMPL( f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc);
BLIS_EXPORT_BLIS void somatadd_blis_impl( f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc);
BLIS_EXPORT_BLIS void SOMATADD_BLIS_IMPL_( f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const float* alpha, const float* A, f77_int* lda, const float* beta, const float* B, f77_int* ldb, float* C, f77_int* ldc);
BLIS_EXPORT_BLIS void SOMATCOPY2_BLIS_IMPL( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, f77_int* stridea, float* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void somatcopy2_blis_impl( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, f77_int* stridea, float* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void SOMATCOPY2_BLIS_IMPL_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, f77_int* stridea, float* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void SOMATCOPY_BLIS_IMPL( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void somatcopy_blis_impl( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void SOMATCOPY_BLIS_IMPL_( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha, const float* aptr, f77_int* lda, float* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void ZIMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, dcomplex* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void zimatcopy_blis_impl(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, dcomplex* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void ZIMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, dcomplex* aptr, f77_int* lda, f77_int* ldb);
BLIS_EXPORT_BLIS void ZOMATADD_BLIS_IMPL(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda, const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc);
BLIS_EXPORT_BLIS void zomatadd_blis_impl(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda, const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc);
BLIS_EXPORT_BLIS void ZOMATADD_BLIS_IMPL_(f77_char* transa, f77_char* transb, f77_int* m, f77_int* n, const dcomplex* alpha, const dcomplex* A, f77_int* lda, const dcomplex* beta, dcomplex* B, f77_int* ldb, dcomplex* C, f77_int* ldc);
BLIS_EXPORT_BLIS void ZOMATCOPY2_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, f77_int* stridea, dcomplex* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void zomatcopy2_blis_impl(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, f77_int* stridea, dcomplex* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void ZOMATCOPY2_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, f77_int* stridea, dcomplex* bptr, f77_int* ldb, f77_int* strideb);
BLIS_EXPORT_BLIS void ZOMATCOPY_BLIS_IMPL(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void zomatcopy_blis_impl(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb);
BLIS_EXPORT_BLIS void ZOMATCOPY_BLIS_IMPL_(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha, const dcomplex* aptr, f77_int* lda, dcomplex* bptr, f77_int* ldb);
#endif
#endif