Files
blis/cpp/blis.hh
Chithra Sankar e7dbbb1194 Level 3 BLAS CPP routines(Except trmm) + Doxygen Commenting + Test application for dgemm CPP API
Change-Id: I97d1203ce466f3adb991341c17db9070c1eaf104
2019-08-19 10:29:58 +05:30

1177 lines
32 KiB
C++

/******************************************************************************
* Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*******************************************************************************/
/*! @file blis.hh
* blis.hh defines all the CPP templated public interfaces
* */
#ifndef BLIS_HH
#define BLIS_HH
#include "cblas.hh"
#include "blis_util.hh"
#include <limits>
namespace blis {
/*! @brief \b GEMM
\verbatim
GEMM performs general matrix-matrix multiply for arbitrary data types
Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL,
SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16)
C := alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X**T or op( X ) = X**H,
alpha and beta are scalars, and A, B and C are matrices, with op( A )
an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
\endverbatim
\param[in] layout
\verbatim
layout is enum CBLAS_ORDER
layout specifies Matrix storage as follows:
layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor.
\endverbatim
\param[in] transA
\verbatim
transA is CBLAS_TRANSPOSE
On entry, transA specifies the form of op( A ) to be used in
the matrix multiplication as follows:
transA = CBLAS_TRANSPOSE::CblasNoTrans, op( A ) = A.
transA = CBLAS_TRANSPOSE::CblasTrans, op( A ) = A**T.
transA = CBLAS_TRANSPOSE::CblasConjTrans, op( A ) = A**H.
\endverbatim
\param[in] transB
\verbatim
transB is CBLAS_TRANSPOSE
On entry, transB specifies the form of op( B ) to be used in
the matrix multiplication as follows:
transB = CBLAS_TRANSPOSE::CblasNoTrans, op( B ) = B.
transB = CBLAS_TRANSPOSE::CblasTrans, op( B ) = B**T.
transB = CBLAS_TRANSPOSE::CblasConjTrans, op( B ) = B**H.
\endverbatim
\param[in] m
\verbatim
m is INTEGER
On entry, m specifies the number of rows of the matrix
op( A ) and of the matrix C. m must be at least zero.
\endverbatim
\param[in] n
\verbatim
n is INTEGER
On entry, n specifies the number of columns of the matrix
op( B ) and the number of columns of the matrix C. n must be
at least zero.
\endverbatim
\param[in] k
\verbatim
k is INTEGER
On entry, k specifies the number of columns of the matrix
op( A ) and the number of rows of the matrix op( B ). k must
be at least zero.
\endverbatim
\param[in] alpha
\verbatim
alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, alpha specifies the scalar alpha.
\endverbatim
\param[in] A
\verbatim
A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If transA = CblasNoTrans:
m-by-k , stored in an lda-by-k array [RowMajor: m-by-lda].
Otherwise:
k-by-m , stored in an lda-by-m array [RowMajor: k-by-lda].
\endverbatim
\param[in] lda
\verbatim
lda is INTEGER
On entry, lda specifies the Leading dimension of A
If transA = CblasNoTrans: lda >= max(1, m) [RowMajor: lda >= max(1, k)].
Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, m)].
\endverbatim
\param[in] B
\verbatim
B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If transA = CblasNoTrans:
k-by-n , stored in an ldb-by-n array [RowMajor: k-by-ldb].
Otherwise:
n-by-k , stored in an ldb-by-k array [RowMajor: n-by-ldb].
\endverbatim
\param[in] ldb
\verbatim
ldb is INTEGER
On entry, ldb specifies the Leading dimension of B
If transA = CblasNoTrans: ldb >= max(1, k) [RowMajor: ldb >= max(1, n)].
Otherwise: ldb >= max(1, n) [RowMajor: ldb >= max(1, k)].
\endverbatim
\param[in] beta
\verbatim
beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, beta specifies the scalar alpha.When beta is
supplied as zero then C need not be set on input.
\endverbatim
\param[in,out] C
\verbatim
C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension :
m-by-n stored in an ldc-by-n array [RowMajor: m-by-ldc].
Before entry, the leading m by n part of the array C must
contain the matrix C, except when beta is zero, in which
case C need not be set on entry.
On exit, the array C is overwritten by the m by n matrix
( alpha*op( A )*op( B ) + beta*C ).
\endverbatim
\param[in] ldc
\verbatim
ldc is INTEGER
On entry, ldc specifies the first dimension of C
ldc >= max(1, m) [RowMajor: ldc >= max(1, n)].
\endverbatim
*/
template< typename T >
void gemm(
CBLAS_ORDER layout,
CBLAS_TRANSPOSE transA,
CBLAS_TRANSPOSE transB,
int64_t m, int64_t n, int64_t k,
T alpha,
T const *A, int64_t lda,
T const *B, int64_t ldb,
T beta,
T *C, int64_t ldc )
{
cblas_gemm(layout, transA, transB, m, n, k, alpha, A,lda, B, ldb, beta, C, ldc);
}
/*! @brief \b TRSM
\verbatim
TRSM performs solves one of the matrix equations for arbitrary data types
Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL,
SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16)
op( A )*X = alpha*B, or X*op( A ) = alpha*B,
where alpha is a scalar, X and B are m by n matrices, A is a unit, or
non-unit, upper or lower triangular matrix and op( A ) is one of
where op( X ) is one of
op( A ) = A or op( A ) = A**T or op( A ) = A**H.
The matrix X is overwritten on B.
\endverbatim
\param[in] layout
\verbatim
layout is enum CBLAS_ORDER
layout specifies Matrix storage as follows:
layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor.
\endverbatim
\param[in] side
\verbatim
side is enum CBLAS_SIDE
side specifies specifies whether op( A ) appears on the left
or right of X as follows:
side = CBLAS_SIDE::CblasLeft op( A )*X = alpha*B.
side = CBLAS_SIDE::CblasRight op( A )*X = alpha*B.
\endverbatim
\param[in] uplo
\verbatim
uplo is enum CBLAS_UPLO
uplo specifies specifies whether the matrix A is an upper or
lower triangular matrix as follows:
uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix.
uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix.
\endverbatim
\param[in] trans
\verbatim
trans is CBLAS_TRANSPOSE
On entry, trans specifies the form of op( A ) to be used in
the matrix multiplication as follows:
trans = CBLAS_TRANSPOSE::CblasNoTrans, op( A ) = A.
trans = CBLAS_TRANSPOSE::CblasTrans, op( A ) = A**T.
trans = CBLAS_TRANSPOSE::CblasConjTrans, op( A ) = A**H.
\endverbatim
\param[in] diag
\verbatim
diag is enum CBLAS_DIAG
diag specifies specifies whether or not A is unit triangular
as follows:
diag = CBLAS_DIAG::CblasUnit A is assumed to be unit triangular.
diag = CBLAS_DIAG::CblasNonUnit A is not assumed to be unit
triangular.
\endverbatim
\param[in] m
\verbatim
m is INTEGER
On entry, m specifies the number of rows of the matrix
B. m must be at least zero.
\endverbatim
\param[in] n
\verbatim
n is INTEGER
On entry, n specifies the number of columns of the matrix
B. n must be at least zero.
\endverbatim
\param[in] alpha
\verbatim
alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, alpha specifies the scalar alpha.
\endverbatim
\param[in] A
\verbatim
A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If side = CblasLeft:
the m-by-m matrix A, stored in an lda-by-m array [RowMajor: m-by-lda].
If side = CblasRight:
the n-by-n matrix A, stored in an lda-by-n array [RowMajor: n-by-lda].
\endverbatim
\param[in] lda
\verbatim
lda is INTEGER
On entry, lda specifies the Leading dimension of A
If side = CblasLeft: lda >= max(1, m) .
If side = CblasRight:lda >= max(1, k) .
\endverbatim
\param[in] B
\verbatim
B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
m-by-n , stored in an ldb-by-n array [RowMajor: m-by-ldb].
on exit is overwritten by the solution matrix X.
\endverbatim
\param[in] ldb
\verbatim
ldb is INTEGER
On entry, ldb specifies the Leading dimension of B
ldb >= max(1, m) [RowMajor: ldb >= max(1, n)].
\endverbatim
*/
template< typename T >
void trsm(
CBLAS_ORDER layout,
CBLAS_SIDE side,
CBLAS_UPLO uplo,
CBLAS_TRANSPOSE trans,
CBLAS_DIAG diag,
int64_t m,
int64_t n,
T alpha,
T const *A, int64_t lda,
T *B, int64_t ldb )
{
cblas_trsm( layout, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
}
/*! @brief \b HEMM
\verbatim
HEMM performs solves one of the matrix-matrix operations for arbitrary data types
Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL,
SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16)
C := alpha*A*B + beta*C
or
C := alpha*B*A + beta*C,
where alpha is a scalar, A is an hermitian matrix
C and B are m by n matrices
\endverbatim
\param[in] layout
\verbatim
layout is enum CBLAS_ORDER
layout specifies Matrix storage as follows:
layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor.
\endverbatim
\param[in] side
\verbatim
side is enum CBLAS_SIDE
side specifies specifies whether the hermitian matrix A
appears on the left or right in the operation as follows:
side = CBLAS_SIDE::CblasLeft C := alpha*A*B + beta*C,
side = CBLAS_SIDE::CblasRight C := alpha*B*A + beta*C
\endverbatim
\param[in] uplo
\verbatim
uplo is enum CBLAS_UPLO
uplo specifies specifies whether the upper or lower
triangular part of the hermitian matrix A is to be
referenced as follows:
uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of the
hermitian matrix is to be referenced.
uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of the
hermitian matrix is to be referenced.
\endverbatim
\param[in] m
\verbatim
m is INTEGER
On entry, m specifies the number of rows of the matrix
C. m must be at least zero.
\endverbatim
\param[in] n
\verbatim
n is INTEGER
On entry, n specifies the number of columns of the matrix
C. n must be at least zero.
\endverbatim
\param[in] alpha
\verbatim
alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, alpha specifies the scalar alpha.
\endverbatim
\param[in] A
\verbatim
A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If side = CblasLeft:
the m-by-m matrix A, stored in an lda-by-m array [RowMajor: m-by-lda].
If side = CblasRight:
the n-by-n matrix A, stored in an lda-by-n array [RowMajor: n-by-lda].
\endverbatim
\param[in] lda
\verbatim
lda is INTEGER
On entry, lda specifies the Leading dimension of A
If side = CblasLeft: lda >= max(1, m) .
If side = CblasRight:lda >= max(1, k) .
\endverbatim
\param[in] B
\verbatim
B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
m-by-n , stored in an ldb-by-n array [RowMajor: m-by-ldb].
\endverbatim
\param[in] ldb
\verbatim
ldb is INTEGER
On entry, ldb specifies the Leading dimension of B
ldb >= max(1, m) [RowMajor: ldb >= max(1, n)].
\endverbatim
\param[in] beta
\verbatim
beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, beta specifies the scalar beta.
If beta is zero, C need not be set on input
\endverbatim
\param[in] C
\verbatim
C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
m-by-n , stored in an ldc-by-n array [RowMajor: m-by-ldc].
\endverbatim
\param[in] ldc
\verbatim
ldc is INTEGER
On entry, ldc specifies the Leading dimension of C
ldc >= max(1, m) [RowMajor: ldc >= max(1, n)].
\endverbatim
*/
template< typename T >
void hemm(
CBLAS_ORDER layout,
CBLAS_SIDE side,
CBLAS_UPLO uplo,
int64_t m, int64_t n,
T alpha,
T const *A, int64_t lda,
T const *B, int64_t ldb,
T beta,
T *C, int64_t ldc )
{
cblas_hemm( layout, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
}
/*! @brief \b SYMM
\verbatim
SYMM performs solves one of the matrix-matrix operations for arbitrary data types
Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL,
SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16)
C := alpha*A*B + beta*C
or
C := alpha*B*A + beta*C,
where alpha is a scalar, A is an symmetric matrix
C and B are m by n matrices
\endverbatim
\param[in] layout
\verbatim
layout is enum CBLAS_ORDER
layout specifies Matrix storage as follows:
layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor.
\endverbatim
\param[in] side
\verbatim
side is enum CBLAS_SIDE
side specifies specifies whether the symmetric matrix A
appears on the left or right in the operation as follows:
side = CBLAS_SIDE::CblasLeft C := alpha*A*B + beta*C,
side = CBLAS_SIDE::CblasRight C := alpha*B*A + beta*C
\endverbatim
\param[in] uplo
\verbatim
uplo is enum CBLAS_UPLO
uplo specifies specifies whether the upper or lower
triangular part of the symmetric matrix A is to be
referenced as follows:
uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of the
symmetric matrix is to be referenced.
uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of the
symmetric matrix is to be referenced.
\endverbatim
\param[in] m
\verbatim
m is INTEGER
On entry, m specifies the number of rows of the matrix
C. m must be at least zero.
\endverbatim
\param[in] n
\verbatim
n is INTEGER
On entry, n specifies the number of columns of the matrix
C. n must be at least zero.
\endverbatim
\param[in] alpha
\verbatim
alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, alpha specifies the scalar alpha.
\endverbatim
\param[in] A
\verbatim
A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If side = CblasLeft:
the m-by-m matrix A, stored in an lda-by-m array [RowMajor: m-by-lda].
If side = CblasRight:
the n-by-n matrix A, stored in an lda-by-n array [RowMajor: n-by-lda].
\endverbatim
\param[in] lda
\verbatim
lda is INTEGER
On entry, lda specifies the Leading dimension of A
If side = CblasLeft: lda >= max(1, m) .
If side = CblasRight:lda >= max(1, k) .
\endverbatim
\param[in] B
\verbatim
B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
m-by-n , stored in an ldb-by-n array [RowMajor: m-by-ldb].
\endverbatim
\param[in] ldb
\verbatim
ldb is INTEGER
On entry, ldb specifies the Leading dimension of B
ldb >= max(1, m) [RowMajor: ldb >= max(1, n)].
\endverbatim
\param[in] beta
\verbatim
beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, beta specifies the scalar beta.
If beta is zero, C need not be set on input
\endverbatim
\param[in] C
\verbatim
C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
m-by-n , stored in an ldc-by-n array [RowMajor: m-by-ldc].
\endverbatim
\param[in] ldc
\verbatim
ldc is INTEGER
On entry, ldc specifies the Leading dimension of C
ldc >= max(1, m) [RowMajor: ldc >= max(1, n)].
\endverbatim
*/
template< typename T >
void symm(
CBLAS_ORDER layout,
CBLAS_SIDE side,
CBLAS_UPLO uplo,
int64_t m, int64_t n,
T alpha,
T const *A, int64_t lda,
T const *B, int64_t ldb,
T beta,
T *C, int64_t ldc )
{
cblas_symm( layout, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
}
/*! @brief \b SYRK
\verbatim
SYRK performs one of the symmetric rank k operations for arbitrary data types
Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL,
SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16)
C := alpha*A*A**T + beta*C,
or
C := alpha*A**T*A + beta*C,
where alpha and beta are scalars, C is an n by n symmetric matrix
and A is an n by k matrix in the first case and a k by n matrix
in the second case.
\endverbatim
\param[in] layout
\verbatim
layout is enum CBLAS_LAYOUT
layout specifies Matrix storage as follows:
layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor.
\endverbatim
\param[in] uplo
\verbatim
uplo is enum CBLAS_UPLO
uplo specifies specifies whether the upper or lower
triangular part of the array C is to be referenced
as follows:
uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of C
is to be referenced.
uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of C
is to be referenced.
\endverbatim
\param[in] trans
\verbatim
trans is CBLAS_TRANSPOSE
On entry, trans specifies the operation to be used as follows:
trans = CBLAS_TRANSPOSE::CblasNoTrans,C := alpha*A*A**T + beta*C.
trans = CBLAS_TRANSPOSE::CblasTrans,C := alpha*A**T*A + beta*C.
\endverbatim
\param[in] n
\verbatim
n is INTEGER
On entry, n specifies the order of the matrix C. n must be
at least zero.
\endverbatim
\param[in] k
\verbatim
k is INTEGER
If trans = CblasNoTrans: k is number of columns of the matrix A.
Otherwise: k is number of rows of the matrix A.
k must be at least zero.
\endverbatim
\param[in] alpha
\verbatim
alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, alpha specifies the scalar alpha.
\endverbatim
\param[in] A
\verbatim
A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If transA = CblasNoTrans:
n-by-k , stored in an lda-by-k array [RowMajor: n-by-lda].
Otherwise:
k-by-n , stored in an lda-by-n array [RowMajor: k-by-lda].
\endverbatim
\param[in] lda
\verbatim
lda is INTEGER
On entry, lda specifies the Leading dimension of A
If transA = CblasNoTrans: lda >= max(1, n) [RowMajor: lda >= max(1, k)].
Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, n)].
\endverbatim
\param[in] beta
\verbatim
beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, beta specifies the scalar alpha.When beta is
supplied as zero then C need not be set on input.
\endverbatim
\param[in,out] C
\verbatim
C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension :
The n-by-n symmetric matrix C,
stored in an ldc-by-n array [RowMajor: n-by-ldc].
On exit, the array C is overwritten by the lower/upper
triangular part of the updated matrix.
\endverbatim
\param[in] ldc
\verbatim
ldc is INTEGER
On entry, ldc specifies the first dimension of C
ldc >= max(1, n)
\endverbatim
*/
template< typename T >
void syrk(
CBLAS_ORDER layout,
CBLAS_UPLO uplo,
CBLAS_TRANSPOSE trans,
int64_t n, int64_t k,
T alpha,
T const *A, int64_t lda,
T beta,
T *C, int64_t ldc )
{
cblas_syrk( layout, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
}
/*! @brief \b SYR2K
\verbatim
SYR2K performs one of the symmetric rank 2k operations for arbitrary data types
Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL,
SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16)
C := alpha*A*B**T + alpha*B*A**T + beta*C,
or
C := alpha*A**T*B + alpha*B**T*A + beta*C,
where alpha and beta are scalars, C is an n by n symmetric matrix
and A and B are n by k matrices in the first case and k by n matrices
in the second case.
\endverbatim
\param[in] layout
\verbatim
layout is enum CBLAS_LAYOUT
layout specifies Matrix storage as follows:
layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor.
\endverbatim
\param[in] uplo
\verbatim
uplo is enum CBLAS_UPLO
uplo specifies specifies whether the upper or lower
triangular part of the array C is to be referenced
as follows:
uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of C
is to be referenced.
uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of C
is to be referenced.
\endverbatim
\param[in] trans
\verbatim
trans is CBLAS_TRANSPOSE
On entry, trans specifies the operation to be used as follows:
trans = CBLAS_TRANSPOSE::CblasNoTrans,C := alpha*A*B**T + alpha*B*A**T
+ beta*C.
trans = CBLAS_TRANSPOSE::CblasTrans, C := alpha*A**T*B + alpha*B**T*A
+ beta*C.
\endverbatim
\param[in] n
\verbatim
n is INTEGER
On entry, n specifies the order of the matrix C. n must be
at least zero.
\endverbatim
\param[in] k
\verbatim
k is INTEGER
If trans = CblasNoTrans: k is number of columns of the matrices A & B.
Otherwise: k is number of rows of the matrices A & B.
k must be at least zero.
\endverbatim
\param[in] alpha
\verbatim
alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, alpha specifies the scalar alpha.
\endverbatim
\param[in] A
\verbatim
A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If trans = CblasNoTrans:
n-by-k , stored in an lda-by-k array [RowMajor: n-by-lda].
Otherwise:
k-by-n , stored in an lda-by-n array [RowMajor: k-by-lda].
\endverbatim
\param[in] lda
\verbatim
lda is INTEGER
On entry, lda specifies the Leading dimension of A
If trans = CblasNoTrans: lda >= max(1, n) [RowMajor: lda >= max(1, k)].
Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, n)].
\endverbatim
\param[in] B
\verbatim
B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If trans = CblasNoTrans:
n-by-k , stored in an ldb-by-k array [RowMajor: n-by-ldb].
Otherwise:
k-by-n , stored in an ldb-by-n array [RowMajor: k-by-ldb]
\endverbatim
\param[in] ldb
\verbatim
ldb is INTEGER
On entry, ldb specifies the Leading dimension of B
If trans = CblasNoTrans: ldb >= max(1, n) [RowMajor: ldb >= max(1, k)].
Otherwise: ldb >= max(1, k) [RowMajor: ldb >= max(1, n)].
\endverbatim
\param[in] beta
\verbatim
beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, beta specifies the scalar alpha.When beta is
supplied as zero then C need not be set on input.
\endverbatim
\param[in,out] C
\verbatim
C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension :
The n-by-n symmetric matrix C,
stored in an ldc-by-n array [RowMajor: n-by-ldc].
On exit, the array C is overwritten by the lower/upper
triangular part of the updated matrix.
\endverbatim
\param[in] ldc
\verbatim
ldc is INTEGER
On entry, ldc specifies the first dimension of C
ldc >= max(1, n)
\endverbatim
*/
template< typename T >
void syr2k(
CBLAS_ORDER layout,
CBLAS_UPLO uplo,
CBLAS_TRANSPOSE trans,
int64_t n, int64_t k,
T alpha,
T const *A, int64_t lda,
T const *B, int64_t ldb,
T beta,
T *C, int64_t ldc )
{
cblas_syr2k( layout, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc );
}
/*! @brief \b HERK
\verbatim
HERK performs one of the hermitian rank k operations for arbitrary data types
Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL,
SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16)
C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,
or
C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,
where alpha and beta are real scalars, C is an n by n hermitian
matrix and A is an n by k matrix in the first case and
k by n matrix in the second case.
\endverbatim
\param[in] layout
\verbatim
layout is enum CBLAS_LAYOUT
layout specifies Matrix storage as follows:
layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor.
\endverbatim
\param[in] uplo
\verbatim
uplo is enum CBLAS_UPLO
uplo specifies specifies whether the upper or lower
triangular part of the array C is to be referenced
as follows:
uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of C
is to be referenced.
uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of C
is to be referenced.
\endverbatim
\param[in] trans
\verbatim
trans is CBLAS_TRANSPOSE
On entry, trans specifies the operation to be used as follows:
trans = CBLAS_TRANSPOSE::CblasNoTrans, C := alpha*A*A**H + beta*C.
trans = CBLAS_TRANSPOSE::CblasConjTrans,C := alpha*A**H*A + beta*C.
\endverbatim
\param[in] n
\verbatim
n is INTEGER
On entry, n specifies the order of the matrix C. n must be
at least zero.
\endverbatim
\param[in] k
\verbatim
k is INTEGER
If trans = CblasNoTrans: k is number of columns of the matrix A.
Otherwise: k is number of rows of the matrix A.
k must be at least zero.
\endverbatim
\param[in] alpha
\verbatim
alpha is REAL/DOUBLE PRECISION
On entry, alpha specifies the scalar alpha.
\endverbatim
\param[in] A
\verbatim
A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If trans = CblasNoTrans:
n-by-k , stored in an lda-by-k array [RowMajor: n-by-lda].
Otherwise:
k-by-n , stored in an lda-by-n array [RowMajor: k-by-lda].
\endverbatim
\param[in] lda
\verbatim
lda is INTEGER
On entry, lda specifies the Leading dimension of A
If trans = CblasNoTrans: lda >= max(1, n) [RowMajor: lda >= max(1, k)].
Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, n)].
\endverbatim
\param[in] beta
\verbatim
beta is REAL/DOUBLE PRECISION
On entry, beta specifies the scalar alpha.When beta is
supplied as zero then C need not be set on input.
\endverbatim
\param[in,out] C
\verbatim
C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension :
The n-by-n Hermitian matrix C,
stored in an ldc-by-n array [RowMajor: n-by-ldc].
On exit, the array C is overwritten by the lower/upper
triangular part of the updated matrix.
\endverbatim
\param[in] ldc
\verbatim
ldc is INTEGER
On entry, ldc specifies the first dimension of C
ldc >= max(1, n)
\endverbatim
*/
template< typename T >
void herk(
CBLAS_ORDER layout,
CBLAS_UPLO uplo,
CBLAS_TRANSPOSE trans,
int64_t n, int64_t k,
real_type<T> alpha,
T const *A, int64_t lda,
real_type<T> beta,
T *C, int64_t ldc )
{
cblas_herk( layout, uplo, trans, n, k, alpha, A, lda, beta, C, ldc );
}
/*! @brief \b HER2K
\verbatim
HER2K performs one of the hermitian rank 2k operations for arbitrary data types
Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL,
SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16)
C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,
or
C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,
where alpha and beta are scalars with beta real, C is an n by n
hermitian matrix and A and B are n by k matrices in the first case
and k by n matrices in the second case.
\endverbatim
\param[in] layout
\verbatim
layout is enum CBLAS_LAYOUT
layout specifies Matrix storage as follows:
layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor.
\endverbatim
\param[in] uplo
\verbatim
uplo is enum CBLAS_UPLO
uplo specifies specifies whether the upper or lower
triangular part of the array C is to be referenced
as follows:
uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of C
is to be referenced.
uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of C
is to be referenced.
\endverbatim
\param[in] trans
\verbatim
trans is CBLAS_TRANSPOSE
On entry, trans specifies the operation to be used as follows:
trans = CBLAS_TRANSPOSE::CblasNoTrans, C := alpha*A*B**H +
conjg( alpha )*B*A**H +
beta*C.
trans = CBLAS_TRANSPOSE::CblasConjTrans,C := alpha*A**H*B +
conjg( alpha )*B**H*A +
beta*C.
\endverbatim
\param[in] n
\verbatim
n is INTEGER
On entry, n specifies the order of the matrix C. n must be
at least zero.
\endverbatim
\param[in] k
\verbatim
k is INTEGER
If trans = CblasNoTrans: k is number of columns of the matrices A & B.
Otherwise: k is number of rows of the matrices A & B.
k must be at least zero.
\endverbatim
\param[in] alpha
\verbatim
alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16
On entry, alpha specifies the scalar alpha.
\endverbatim
\param[in] A
\verbatim
A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If trans = CblasNoTrans:
n-by-k , stored in an lda-by-k array [RowMajor: n-by-lda].
Otherwise:
k-by-n , stored in an lda-by-n array [RowMajor: k-by-lda].
\endverbatim
\param[in] lda
\verbatim
lda is INTEGER
On entry, lda specifies the Leading dimension of A
If trans = CblasNoTrans: lda >= max(1, n) [RowMajor: lda >= max(1, k)].
Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, n)].
\endverbatim
\param[in] B
\verbatim
B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension :
If trans = CblasNoTrans:
n-by-k , stored in an ldb-by-k array [RowMajor: n-by-ldb].
Otherwise:
k-by-n , stored in an ldb-by-n array [RowMajor: k-by-ldb]
\endverbatim
\param[in] ldb
\verbatim
ldb is INTEGER
On entry, ldb specifies the Leading dimension of B
If trans = CblasNoTrans: ldb >= max(1, n) [RowMajor: ldb >= max(1, k)].
Otherwise: ldb >= max(1, k) [RowMajor: ldb >= max(1, n)].
\endverbatim
\param[in] beta
\verbatim
beta is REAL/DOUBLE PRECISION
On entry, beta specifies the scalar alpha.When beta is
supplied as zero then C need not be set on input.
\endverbatim
\param[in,out] C
\verbatim
C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension :
The n-by-n Hermitian matrix C,
stored in an ldc-by-n array [RowMajor: n-by-ldc].
On exit, the array C is overwritten by the lower/upper
triangular part of the updated matrix.
\endverbatim
\param[in] ldc
\verbatim
ldc is INTEGER
On entry, ldc specifies the first dimension of C
ldc >= max(1, n)
\endverbatim
*/
template< typename T >
void her2k(
CBLAS_ORDER layout,
CBLAS_UPLO uplo,
CBLAS_TRANSPOSE trans,
int64_t n, int64_t k,
T alpha,
T const *A, int64_t lda,
T const *B, int64_t ldb,
real_type<T> beta,
T *C, int64_t ldc )
{
cblas_her2k( layout, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc );
}
} // namespace blis
#endif // #ifndef BLIS_HH