diff --git a/cpp/blis.hh b/cpp/blis.hh index fc25ebff7..602c094fd 100644 --- a/cpp/blis.hh +++ b/cpp/blis.hh @@ -1,102 +1,3824 @@ +/****************************************************************************** +* Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +*******************************************************************************/ + +/*! @file blis.hh + * blis.hh defines all the BLAS CPP templated public interfaces + * */ #ifndef BLIS_HH #define BLIS_HH -#include "blis_util.hh" #include "cblas.hh" -#include +#include "blis_util.hh" namespace blis { -// ============================================================================= -/// General matrix-matrix multiply, -/// \f[ C = \alpha op(A) \times op(B) + \beta C, \f] -/// where op(X) is one of -/// \f[ op(X) = X, \f] -/// \f[ op(X) = X^T, \f] -/// \f[ op(X) = X^H, \f] -/// alpha and beta are scalars, and A, B, and C are matrices, with -/// op(A) an m-by-k matrix, op(B) a k-by-n matrix, and C an m-by-n matrix. -/// -/// Generic implementation for arbitrary data types. -/// TODO: generic version not yet implemented. -/// -/// @param[in] layout -/// Matrix storage, Layout::ColMajor or Layout::RowMajor. -/// -/// @param[in] transA -/// The operation op(A) to be used: -/// - Op::NoTrans: \f$ op(A) = A. \f$ -/// - Op::Trans: \f$ op(A) = A^T. \f$ -/// - Op::ConjTrans: \f$ op(A) = A^H. \f$ -/// -/// @param[in] transB -/// The operation op(B) to be used: -/// - Op::NoTrans: \f$ op(B) = B. \f$ -/// - Op::Trans: \f$ op(B) = B^T. \f$ -/// - Op::ConjTrans: \f$ op(B) = B^H. \f$ -/// -/// @param[in] m -/// Number of rows of the matrix C and op(A). m >= 0. -/// -/// @param[in] n -/// Number of columns of the matrix C and op(B). n >= 0. -/// -/// @param[in] k -/// Number of columns of op(A) and rows of op(B). k >= 0. -/// -/// @param[in] alpha -/// Scalar alpha. If alpha is zero, A and B are not accessed. -/// -/// @param[in] A -/// - If transA = NoTrans: -/// the m-by-k matrix A, stored in an lda-by-k array [RowMajor: m-by-lda]. -/// - Otherwise: -/// the k-by-m matrix A, stored in an lda-by-m array [RowMajor: k-by-lda]. -/// -/// @param[in] lda -/// Leading dimension of A. -/// - If transA = NoTrans: lda >= max(1, m) [RowMajor: lda >= max(1, k)]. -/// - Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, m)]. -/// -/// @param[in] B -/// - If transB = NoTrans: -/// the k-by-n matrix B, stored in an ldb-by-n array [RowMajor: k-by-ldb]. -/// - Otherwise: -/// the n-by-k matrix B, stored in an ldb-by-k array [RowMajor: n-by-ldb]. -/// -/// @param[in] ldb -/// Leading dimension of B. -/// - If transB = NoTrans: ldb >= max(1, k) [RowMajor: ldb >= max(1, n)]. -/// - Otherwise: ldb >= max(1, n) [RowMajor: ldb >= max(1, k)]. -/// -/// @param[in] beta -/// Scalar beta. If beta is zero, C need not be set on input. -/// -/// @param[in] C -/// The m-by-n matrix C, stored in an ldc-by-n array [RowMajor: m-by-ldc]. -/// -/// @param[in] ldc -/// Leading dimension of C. ldc >= max(1, m) [RowMajor: ldc >= max(1, n)]. -/// -/// @ingroup gemm +/*! \brief Construct plane rotation for arbitrary data types -template< typename TA, typename TB, typename TC > + \b Purpose: + + ROTG construct plane rotation that eliminates b for arbitrary data types, such that \n + + [ z ] = [ c s ] [ a ] \n + [ 0 ] [ -s c ] [ b ] \n + Data precisions supported include SINGLE/DOUBLE PRECISION REAL + + \param[in, out] a + SINGLE/DOUBLE PRECISION REAL + On entry, scalar a. On exit, set to z. + + \param[in, out] b + SINGLE/DOUBLE PRECISION REAL + On entry, scalar b. On exit, set to s, 1/c, or 0. + + \param[out] c + Cosine of rotation; SINGLE/DOUBLE PRECISION REAL. + + \param[out] s + Sine of rotation; SINGLE/DOUBLE PRECISION REAL. + */ +template< typename T > +void rotg( + T *a, + T *b, + T *c, + T *s ) +{ + cblas_rotg(a, b, c, s); +} + +/*! \brief Construct the modified givens transformation matrix for arbitrary data types + + \b Purpose: + + ROTMG construct modified (fast) plane rotation, H, that eliminates b, such that \n + [ z ] = H [ sqrt(d1) 0 ] [ a ] \n + [ 0 ] [ 0 sqrt(d2) ] [ b ] \n + Data precisions supported include SINGLE/DOUBLE PRECISION REAL + + \param[in, out] d1 + SINGLE/DOUBLE PRECISION REAL + sqrt(d1) is scaling factor for vector x. + + \param[in, out] d2 + SINGLE/DOUBLE PRECISION REAL + sqrt(d2) is scaling factor for vector y. + + \param[in, out] a + On entry, scalar a. On exit, set to z. SINGLE/DOUBLE PRECISION REAL. + + \param[in, out] b + On entry, scalar b. SINGLE/DOUBLE PRECISION REAL. + + \param[out] param + SINGLE/DOUBLE PRECISION REAL array, dimension (5),giving parameters + of modified plane rotation + param(1)=DFLAG + param(2)=DH11 + param(3)=DH21 + param(4)=DH12 + param(5)=DH22 + */ +template< typename T > +void rotmg( + T *d1, + T *d2, + T *a, + T b, + T param[5] ) +{ + cblas_rotmg(d1, d2, a, b, param ); +} + +/*! \brief Apply plane rotation for arbitrary data types + + \b Purpose: + + ROT applies a plane rotation: \n + [ x^T ] [ c s ] [ x^T ] \n + [ y^T ] = [ -s c ] [ y^T ] \n + Data precisions supported include SINGLE/DOUBLE PRECISION REAL + + \param[in] n + Number of elements in x and y. n >= 0. + + \param[in, out] x + SINGLE/DOUBLE PRECISION REAL array + The n-element vector x, in an array of length (n-1)*abs(incx) + 1. + + \param[in] incx + incx is INTEGER + Stride between elements of x. incx must not be zero. + If incx < 0, uses elements of x in reverse order: x(n-1), ..., x(0). + + \param[in, out] y + SINGLE/DOUBLE PRECISION REAL array + The n-element vector y, in an array of length (n-1)*abs(incy) + 1. + + \param[in] incy + incy is INTEGER + Stride between elements of y. incy must not be zero. + If incy < 0, uses elements of y in reverse order: y(n-1), ..., y(0). + + \param[in] c + Cosine of rotation; SINGLE/DOUBLE PRECISION REAL. + + \param[in] s + Sine of rotation; SINGLE/DOUBLE PRECISION REAL. + */ +template< typename T > +void rot( + int64_t n, + T *x, int64_t incx, + T *y, int64_t incy, + T c, + T s ) +{ + cblas_rot( n, x, incx, y, incy, c, s ); +} + +/*! \brief Apply the modified givens transformation for arbitrary data types + + \b Purpose: + + ROTM applies modified (fast) plane rotation, H: \n + [ x^T ] = H [ x^T ] \n + [ y^T ] [ y^T ] \n + + Data precisions supported include SINGLE/DOUBLE PRECISION REAL + + \param[in] n + Number of elements in x and y. n >= 0. + + \param[in, out] x + SINGLE/DOUBLE PRECISION REAL array + The n-element vector x, in an array of length (n-1)*abs(incx) + 1. + + \param[in] incx + incx is INTEGER + Stride between elements of x. incx must not be zero. + If incx < 0, uses elements of x in reverse order: x(n-1), ..., x(0). + + \param[in, out] y + SINGLE/DOUBLE PRECISION REAL array + The n-element vector y, in an array of length (n-1)*abs(incy) + 1. + + \param[in] incy + incy is INTEGER + Stride between elements of y. incy must not be zero. + If incy < 0, uses elements of y in reverse order: y(n-1), ..., y(0). + + \param[in] P + SINGLE/DOUBLE PRECISION REAL array, dimension (5),giving parameters + of modified plane rotation + param(1)=DFLAG + param(2)=DH11 + param(3)=DH21 + param(4)=DH12 + param(5)=DH22 + */ +template< typename T > +void rotm( + int64_t n, + T *x, int64_t incx, + T *y, int64_t incy, + const T *P) +{ + cblas_rotm( n, x, incx, y, incy, P ); +} + +/*! \brief Interchanges two vectors of arbitrary data types + + \b Purpose: + + SWAP interchanges two vectors uses unrolled loops for increments equal to 1.\n + x <=> y \n + Data precisions supported include SINGLE/DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + \param[in] n + n is INTEGER + Number of elements in x and y. n >= 0. + + \param[in] x + REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array. + The n-element vector x, in an array of length (n-1)*abs(incx) + 1. + + \param[in] incx + incx is INTEGER. + Stride between elements of x. incx must not be zero. + If incx < 0, uses elements of x in reverse order: x(n-1), ..., x(0). + + \param[in, out] y + REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array. + The n-element vector y, in an array of length (n-1)*abs(incy) + 1. + + \param[in] incy + incy is INTEGER. + Stride between elements of y. incy must not be zero. + If incy < 0, uses elements of y in reverse order: y(n-1), ..., y(0). + */ +template< typename T > +void swap( + int64_t n, + T *x, int64_t incx, + T *y, int64_t incy ) +{ + cblas_swap( n, x, incx, y, incy ); +} + +/*! \brief Scales a vector of arbitrary data types by a constant. + + \b Purpose: + + SCAL scales a vector by a constant, uses unrolled loops for increment equal to 1.\n + x = alpha * x \n + Data precisions of vector & constant include SINGLE/DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + \param[in] n + n is INTEGER + Number of elements in x. n >= 0. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in ,out] x + REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array + The n-element vector x, in an array of length (n-1)*abs(incx) + 1. + + \param[in] incx + incx is INTEGER + Stride between elements of x. incx must not be zero. + If incx < 0, uses elements of x in reverse order: x(n-1), ..., x(0). + */ +template< typename TA, typename TB > +void scal( + int64_t n, + TA alpha, + TB* x, int64_t incx ) +{ + cblas_scal( n, alpha, x, incx ); +} + +/*! \brief Copies a vector x to a vector y for arbitrary data types + + \b Purpose: + + COPY copies a vector x to a vector y.\n + y = x \n + Data precisions supported include SINGLE/DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + \param[in] n + n is INTEGER + Number of elements in x and y. n >= 0. + + \param[in] x + REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array. + The n-element vector x, in an array of length (n-1)*abs(incx) + 1. + + \param[in] incx + incx is INTEGER. + Stride between elements of x. incx must not be zero. + If incx < 0, uses elements of x in reverse order: x(n-1), ..., x(0). + + \param[out] y + REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array. + The n-element vector y, in an array of length (n-1)*abs(incy) + 1. + + \param[in] incy + incy is INTEGER. + Stride between elements of y. incy must not be zero. + If incy < 0, uses elements of y in reverse order: y(n-1), ..., y(0). + */ +template< typename T > +void copy( + int64_t n, + T const *x, int64_t incx, + T *y, int64_t incy ) +{ + cblas_copy( n, x, incx, y, incy ); +} + +/*! \brief Performs addition of scaled vector for arbitrary data types + + \b Purpose: + + AXPY constant times a vector plus a vector.\n + y = alpha*x + y \n + Data precisions supported include SINGLE/DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + \param[in] n + n is INTEGER + Number of elements in x and y. n >= 0. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha.\n + If alpha is zero, y is not updated. + + \param[in] x + REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array. + The n-element vector x, in an array of length (n-1)*abs(incx) + 1. + + \param[in] incx + incx is INTEGER. + Stride between elements of x. incx must not be zero. + If incx < 0, uses elements of x in reverse order: x(n-1), ..., x(0). + + \param[out] y + REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array. + The n-element vector y, in an array of length (n-1)*abs(incy) + 1. + + \param[in] incy + incy is INTEGER. + Stride between elements of y. incy must not be zero. + If incy < 0, uses elements of y in reverse order: y(n-1), ..., y(0). + */ +template< typename T > +void axpy( + int64_t n, + T alpha, + T const *x, int64_t incx, + T *y, int64_t incy ) +{ + cblas_axpy( n, alpha, x, incx, y, incy ); +} + +/*! \brief Performs the dot product of two vectors for arbitrary data types + + \b Purpose: + + DOT forms the dot product of two vectors + uses unrolled loops for increments equal to one.\n + dot = x^T * y \n + Data precisions supported include SINGLE/DOUBLE PRECISION REAL + + \param[in] n + n is INTEGER + Number of elements in x and y. n >= 0. + + \param[in] x + REAL/DOUBLE PRECISION array. + The n-element vector x, in an array of length (n-1)*abs(incx) + 1. + + \param[in] incx + incx is INTEGER. + Stride between elements of x. incx must not be zero. + If incx < 0, uses elements of x in reverse order: x(n-1), ..., x(0). + + \param[in] y + REAL/DOUBLE PRECISION array. + The n-element vector y, in an array of length (n-1)*abs(incy) + 1. + + \param[in] incy + incy is INTEGER. + Stride between elements of y. incy must not be zero. + If incy < 0, uses elements of y in reverse order: y(n-1), ..., y(0). + + \return Unconjugated dot product, x^T * y. + REAL/DOUBLE PRECISION + */ +template< typename T, typename TR > +TR dot( + int64_t n, + T const *x, int64_t incx, + T const *y, int64_t incy ) +{ + if((std::is_same::value)&(std::is_same::value)) + return cblas_dsdot( n, x, incx, y, incy ); + else + return cblas_dot( n, x, incx, y, incy ); +} + +/*! \brief Performs the dot product of two complex vectors + + \b Purpose: + + DOTU forms the dot product of two complex vectors. \n + CDOTU = X^T * Y \n + Data precisions supported include SINGLE/DOUBLE PRECISION COMPLEX + + \param[in] n + n is INTEGER + Number of elements in x and y. n >= 0. + + \param[in] x + REAL/DOUBLE PRECISION COMPLEX array. + The n-element vector x, in an array of length (n-1)*abs(incx) + 1. + + \param[in] incx + incx is INTEGER. + Stride between elements of x. incx must not be zero. + If incx < 0, uses elements of x in reverse order: x(n-1), ..., x(0). + + \param[in] y + REAL/DOUBLE PRECISION COMPLEX array. + The n-element vector y, in an array of length (n-1)*abs(incy) + 1. + + \param[in] incy + incy is INTEGER. + Stride between elements of y. incy must not be zero. + If incy < 0, uses elements of y in reverse order: y(n-1), ..., y(0). + + \return Unconjugated dot product, x^T * y. + REAL/DOUBLE PRECISION COMPLEX + */ +template< typename T > +T dotu( + int64_t n, + T const *x, int64_t incx, + T const *y, int64_t incy ) +{ + return cblas_dotu( n, x, incx, y, incy ); +} + +/*! \brief Performs the dot product of two complex vectors + + \b Purpose: + + DOTC forms the dot product of two complex vectors. \n + CDOTU = X^H * Y \n + Data precisions supported include SINGLE/DOUBLE PRECISION COMPLEX + + \param[in] n + n is INTEGER + Number of elements in x and y. n >= 0. + + \param[in] x + REAL/DOUBLE PRECISION COMPLEX array. + The n-element vector x, in an array of length (n-1)*abs(incx) + 1. + + \param[in] incx + incx is INTEGER. + Stride between elements of x. incx must not be zero. + If incx < 0, uses elements of x in reverse order: x(n-1), ..., x(0). + + \param[in] y + REAL/DOUBLE PRECISION COMPLEX array. + The n-element vector y, in an array of length (n-1)*abs(incy) + 1. + + \param[in] incy + incy is INTEGER. + Stride between elements of y. incy must not be zero. + If incy < 0, uses elements of y in reverse order: y(n-1), ..., y(0). + + \return Conjugated dot product, x^H * y. + REAL/DOUBLE PRECISION COMPLEX + */ +template< typename T > +T dotc( + int64_t n, + T const *x, int64_t incx, + T const *y, int64_t incy ) +{ + return cblas_dotc( n, x, incx, y, incy ); +} + +/*! \brief Performs inner product of two vectors with extended precision accumulation + + \b Purpose: + + DOTC forms the inner product of two vectors with extended precision accumulation. \n + Data precisions supported include SINGLE PRECISION REAL + + \param[in] n + n is INTEGER\n + number of elements in input vector(s) + + \param[in] alpha + alpha is REAL\n + single precision scalar to be added to inner product + + \param[in] x + x is REAL array, dimension ( 1 + ( n - 1 )*abs( incx ) )\n + single precision vector with n elements + + \param[in] incx + incx is INTEGER\n + storage spacing between elements of x + + \param[in] y + y is REAL array, dimension ( 1 + ( n - 1 )*abs( incx ) )\n + single precision vector with n elements + + \param[in] incy + incy is INTEGER\n + storage spacing between elements of y + + \return S.P. result with dot product accumulated in D.P. + */ +template< typename T > +T sdsdot( + int64_t n, + T alpha, + T const *x, int64_t incx, + T const *y, int64_t incy ) +{ + return cblas_sdsdot( n, alpha, x, incx, y, incy ); +} + +/*! \brief return 2-norm of vectors of arbitrary data types + + \b Purpose: + + NRM2 returns the euclidean norm of a vector via the function name, so that + SNRM2 := sqrt( x'*x ). \n + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + \param[in] n + n is INTEGER\n + number of elements in input vector(s) + + \param[in] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, + dimension ( 1 + ( n - 1 )*abs( incx ) )\n + single precision vector with n elements + + \param[in] incx + incx is INTEGER\n + storage spacing between elements of x + + \return 2-norm of vector + REAL SINGLE/DOUBLE PRECISION + */ +template< typename T > +real_type +nrm2( + int64_t n, + T const * x, int64_t incx ) +{ + return cblas_nrm2( n, x, incx ); +} + +/*! \brief return 1-norm of vector of arbitrary data types + + \b Purpose: + + ASUM takes the sum of the absolute values, uses unrolled loops for + increment equal to one. \n + ASUM := || Re(x) ||_1 + || Im(x) ||_1. \n + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + \param[in] n + n is INTEGER\n + number of elements in input vector(s) + + \param[in] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, + dimension ( 1 + ( n - 1 )*abs( incx ) )\n + single precision vector with n elements + + \param[in] incx + incx is INTEGER\n + storage spacing between elements of x + + \return 1-norm of vector + REAL SINGLE/DOUBLE PRECISION + */ +template< typename T > +real_type +asum( + int64_t n, + T const *x, int64_t incx ) +{ + return cblas_asum( n, x, incx ); +} + +/*! \brief Return Index of infinity-norm of vectors of arbitrary types. + + \b Purpose: + + IAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)|. \n + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + \param[in] n + n is INTEGER\n + number of elements in input vector(s) + + \param[in] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, + dimension ( 1 + ( n - 1 )*abs( incx ) ) \n + single precision vector with n elements + + \param[in] incx + incx is INTEGER\n + storage spacing between elements of x + + \return Index of infinity-norm of vector + INTEGER + */ +template< typename T > +int64_t iamax( + int64_t n, + T const *x, int64_t incx ) +{ + return cblas_iamax( n, x, incx ); +} + +/*! \brief Solve General matrix-vector multiply for arbitrary data types + + \b Purpose: + + GEMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be used as follows: \n + trans = CBLAS_TRANSPOSE::CblasNoTrans,y := alpha*A*x + beta*y. \n + trans = CBLAS_TRANSPOSE::CblasTrans, y := alpha*A**T*x + beta*y. \n + trans = CBLAS_TRANSPOSE::CblasConjTrans, y := alpha*A**T*x + beta*y. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix A. + m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + m-by-n , stored in an lda-by-n array [RowMajor: m-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda >= max(1, m) [RowMajor: lda >= max(1, n)]. + + \param[in] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : \n + If trans = CblasNoTrans: + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Otherwise: + at least ( 1 + ( m - 1 )*abs( incx ) ). + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] beta + beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then y need not be set on input. + + \param[in,out] y + y is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension : \n + If trans = CblasNoTrans: + at least ( 1 + ( m - 1 )*abs( incy ) ). \n + Otherwise: + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry with beta non-zero, the incremented array y + must contain the vector y. On exit, y is overwritten by the + updated vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + */ +template< typename T > +void gemv( + CBLAS_ORDER layout, + CBLAS_TRANSPOSE trans, + int64_t m, int64_t n, + T alpha, + T const *A, int64_t lda, + T const *x, int64_t incx, + T beta, + T *y, int64_t incy ) +{ + cblas_gemv(layout, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +/*! \brief Solve General matrix-vector multiply for arbitrary data types + + \b Purpose: + + GBMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or + + y := alpha*A**H*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix with kl sub-diagonals and ku super-diagonals. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be used as follows: \n + trans = CBLAS_TRANSPOSE::CblasNoTrans,y := alpha*A*x + beta*y. \n + trans = CBLAS_TRANSPOSE::CblasTrans, y := alpha*A**T*x + beta*y. \n + trans = CBLAS_TRANSPOSE::CblasConjTrans, y := alpha*A**H*x + beta*y. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix A. + m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix A. + n must be at least zero. + + \param[in] kl + kl is INTEGER + On entry, kl specifies the number of sub-diagonals of the matrix A. + kl must be at least zero. + + \param[in] ku + ku is INTEGER + On entry, ku specifies the number of super-diagonals of the matrix A. + ku must be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension lda-by-n. + Before entry, the leading ( kl + ku + 1 ) by n part of the + array A must contain the matrix of coefficients, supplied + column by column, with the leading diagonal of the matrix in + row ( ku + 1 ) of the array, the first super-diagonal + starting at position 2 in row ku, the first sub-diagonal + starting at position 1 in row ( ku + 2 ), and so on. + Elements in the array A that do not correspond to elements + in the band matrix (such as the top left ku by ku triangle) + are not referenced. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda >= ( kl + ku + 1 ) + + \param[in] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : \n + If trans = CblasNoTrans: + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Otherwise: + at least ( 1 + ( m - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] beta + beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then y need not be set on input. + + \param[in,out] y + y is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension : \n + If trans = CblasNoTrans: + at least ( 1 + ( m - 1 )*abs( incy ) ). \n + Otherwise: + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry with beta non-zero, the incremented array y + must contain the vector y. On exit, y is overwritten by the + updated vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + */ +template< typename T > +void gbmv( + CBLAS_ORDER layout, + CBLAS_TRANSPOSE trans, + int64_t m, int64_t n, + int64_t kl, int64_t ku, + T alpha, + T const *A, int64_t lda, + T const *x, int64_t incx, + T beta, + T *y, int64_t incy ) +{ + cblas_gbmv(layout, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); +} + +/*! \brief Solves Hermitian matrix-vector multiply for arbitrary data types + + \b Purpose: + + HEMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION COMPLEX, + DOUBLE PRECISION COMPLEX(COMPLEX*16) + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n hermitian matrix. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the matrix A is an upper or + lower triangular matrix as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] alpha + alpha is COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is COMPLEX/COMPLEX*16 array,dimension lda-by-n. \n + Before entry with UPLO = CblasUpper, the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = CblasLower, the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. \n + Note that the imaginary parts of the diagonal elements need + not be set and are assumed to be zero. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, n ). + + \param[in] x + x is COMPLEX/COMPLEX*16 array,dimension : \n + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] beta + beta is COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then y need not be set on input. + + \param[in,out] y + y is COMPLEX/COMPLEX*16 array, dimension : \n + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry with beta non-zero, the incremented array y + must contain the vector y. On exit, y is overwritten by the + updated vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + */ +template< typename T > +void hemv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *A, int64_t lda, + T const *x, int64_t incx, + T beta, + T *y, int64_t incy ) +{ + cblas_hemv(layout, uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +/*! \brief Solves Hermitian matrix-vector multiply for arbitrary data types + + \b Purpose: + + HBMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION COMPLEX, + DOUBLE PRECISION COMPLEX(COMPLEX*16) + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n hermitian matrix with k super-diagonals. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the the upper or lower triangular + part of the band matrix A is being supplied as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] k + k is INTEGER + On entry, k specifies the number of super-diagonals of the matrix A. + k must be at least zero. + + \param[in] alpha + alpha is COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is COMPLEX/COMPLEX*16 array,dimension lda-by-n. \n + Before entry with UPLO = CblasUpper, the leading ( k + 1 ) + by n part of the array A must contain the upper triangular + band part of the hermitian matrix, supplied column by + column, with the leading diagonal of the matrix in row + ( k + 1 ) of the array, the first super-diagonal starting at + position 2 in row k, and so on. The top left k by k triangle + of the array A is not referenced. \n + Before entry with UPLO = CblasLower, the leading ( k + 1 ) + by n part of the array A must contain the lower triangular + band part of the hermitian matrix, supplied column by + column, with the leading diagonal of the matrix in row 1 of + the array, the first sub-diagonal starting at position 1 in + row 2, and so on. The bottom right k by k triangle of the + array A is not referenced. \n + Note that the imaginary parts of the diagonal elements need + not be set and are assumed to be zero. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least ( k + 1 ). + + \param[in] x + x is COMPLEX/COMPLEX*16 array,dimension : \n + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] beta + beta is COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar alpha. + + \param[in,out] y + y is COMPLEX/COMPLEX*16 array, dimension : \n + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry with beta non-zero, the incremented array y + must contain the vector y. On exit, y is overwritten by the + updated vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + */ +template< typename T > +void hbmv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, int64_t k, + T alpha, + T const *A, int64_t lda, + T const *x, int64_t incx, + T beta, + T *y, int64_t incy ) +{ + cblas_hbmv(layout, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +/*! \brief Solves Hermitian matrix-vector multiply for arbitrary data types + + \b Purpose: + + HPMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION COMPLEX, + DOUBLE PRECISION COMPLEX(COMPLEX*16) + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n hermitian matrix, supplied in packed form. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the the upper or lower triangular + part of the band matrix A is supplied in the packed array Ap as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] alpha + alpha is COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] Ap + Ap is COMPLEX/COMPLEX*16 array,dimension atleast ( ( n*( n + 1 ) )/2 ). \n + Before entry with UPLO = CblasUpper, the array Ap must + contain the upper triangular part of the hermitian matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 1, 2 ) + and a( 2, 2 ) respectively, and so on. \n + Before entry with UPLO = CblasLower, the array Ap must + contain the lower triangular part of the hermitian matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 2, 1 ) + and a( 3, 1 ) respectively, and so on. \n + Note that the imaginary parts of the diagonal elements need + not be set and are assumed to be zero. + + \param[in] x + x is COMPLEX/COMPLEX*16 array,dimension : \n + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] beta + beta is COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then y need not be set on input. + + \param[in,out] y + y is COMPLEX/COMPLEX*16 array, dimension : \n + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry with beta non-zero, the incremented array y + must contain the vector y. On exit, y is overwritten by the + updated vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + */ +template< typename T > +void hpmv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *Ap, + T const *x, int64_t incx, + T beta, + T *y, int64_t incy ) +{ + cblas_hpmv(layout, uplo, n, alpha, Ap, x, incx, beta, y, incy); +} + +/*! \brief Solves Symmetric matrix-vector multiply for arbitrary data types + + \b Purpose: + + SYMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n symmetric matrix. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the matrix A is an upper or + lower triangular matrix as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION REAL + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is SINGLE/DOUBLE PRECISION REAL array,dimension lda-by-n. \n + Before entry with UPLO = CblasUpper, the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = CblasLower, the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. \n + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, n ). + + \param[in] x + x is SINGLE/DOUBLE PRECISION REAL array,dimension : \n + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] beta + beta is SINGLE/DOUBLE PRECISION REAL + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then y need not be set on input. + + \param[in,out] y + y is SINGLE/DOUBLE PRECISION REAL array, dimension : \n + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry with beta non-zero, the incremented array y + must contain the vector y. On exit, y is overwritten by the + updated vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + */ +template< typename T > +void symv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *A, int64_t lda, + T const *x, int64_t incx, + T beta, + T *y, int64_t incy ) +{ + cblas_symv(layout, uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +/*! \brief Solves symmetric matrix-vector multiply for arbitrary data types + + \b Purpose: + + SBMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n symmetric matrix with k super-diagonals. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the the upper or lower triangular + part of the band matrix A is being supplied as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] k + k is INTEGER + On entry, k specifies the number of super-diagonals of the matrix A. + k must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION REAL + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is SINGLE/DOUBLE PRECISION REAL array,dimension lda-by-n. \n + Before entry with UPLO = CblasUpper, the leading ( k + 1 ) + by n part of the array A must contain the upper triangular + band part of the symmetric matrix, supplied column by + column, with the leading diagonal of the matrix in row + ( k + 1 ) of the array, the first super-diagonal starting at + position 2 in row k, and so on. The top left k by k triangle + of the array A is not referenced. \n + Before entry with UPLO = CblasLower, the leading ( k + 1 ) + by n part of the array A must contain the lower triangular + band part of the symmetric matrix, supplied column by + column, with the leading diagonal of the matrix in row 1 of + the array, the first sub-diagonal starting at position 1 in + row 2, and so on. The bottom right k by k triangle of the + array A is not referenced. \n + Note that the imaginary parts of the diagonal elements need + not be set and are assumed to be zero. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least ( k + 1 ). + + \param[in] x + x is SINGLE/DOUBLE PRECISION REAL array,dimension : \n + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] beta + beta is SINGLE/DOUBLE PRECISION REAL + On entry, beta specifies the scalar alpha. + + \param[in,out] y + y is SINGLE/DOUBLE PRECISION REAL array, dimension : \n + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry with beta non-zero, the incremented array y + must contain the vector y. On exit, y is overwritten by the + updated vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + */ +template< typename T > +void sbmv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, int64_t k, + T alpha, + T const *A, int64_t lda, + T const *x, int64_t incx, + T beta, + T *y, int64_t incy ) +{ + cblas_sbmv(layout, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +/*! \brief Solves symmetric matrix-vector multiply for arbitrary data types + + \b Purpose: + + SPMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n symmetric matrix, supplied in packed form. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the the upper or lower triangular + part of the band matrix A is supplied in the packed array Ap as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION REAL + On entry, alpha specifies the scalar alpha. + + \param[in] Ap + Ap is SINGLE/DOUBLE PRECISION REAL array,dimension atleast ( ( n*( n + 1 ) )/2 ). \n + Before entry with UPLO = CblasUpper, the array Ap must + contain the upper triangular part of the symmetric matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 1, 2 ) + and a( 2, 2 ) respectively, and so on. \n + Before entry with UPLO = CblasLower, the array Ap must + contain the lower triangular part of the symmetric matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 2, 1 ) + and a( 3, 1 ) respectively, and so on. \n + Note that the imaginary parts of the diagonal elements need + not be set and are assumed to be zero. + + \param[in] x + x is SINGLE/DOUBLE PRECISION REAL array,dimension : \n + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] beta + beta is SINGLE/DOUBLE PRECISION REAL + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then y need not be set on input. + + \param[in,out] y + y is SINGLE/DOUBLE PRECISION REAL array, dimension : \n + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry with beta non-zero, the incremented array y + must contain the vector y. On exit, y is overwritten by the + updated vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + */ +template< typename T > +void spmv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *Ap, + T const *x, int64_t incx, + T beta, + T *y, int64_t incy ) +{ + cblas_spmv(layout, uplo, n, alpha, Ap, x, incx, beta, y, incy); +} + +/*! \brief Solve the one of the matrix-vector operations for arbitrary data types + + \b Purpose: + + TRMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + x := A*x, or x := A**T*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the matrix A is an upper or + lower triangular matrix as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be performed as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, x := A*x. \n + trans = CBLAS_TRANSPOSE::CblasTrans, x := A**T*x. \n + trans = CBLAS_TRANSPOSE::CblasConjTrans, x := A**T*x. + + \param[in] diag + diag is enum CBLAS_DIAG + diag specifies specifies whether or not A is unit triangular + as follows: \n + diag = CBLAS_DIAG::CblasUnit A is assumed to be unit triangular.\n + diag = CBLAS_DIAG::CblasNonUnit A is not assumed to be unit + triangular. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension ( lda, n )\n + Before entry with UPLO = CblasUpper, the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. \n + Before entry with UPLO = CblasLower, the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. \n + Note that when DIAG = CblasUnit, the diagonal elements of + A are not referenced either, but are assumed to be unity. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, n ). + + \param[in, out] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : \n + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x.On exit, x is overwritten with the transformed vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + */ +template< typename T > +void trmv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + CBLAS_DIAG diag, + int64_t n, + T const *A, int64_t lda, + T *x, int64_t incx ) +{ + cblas_trmv(layout, uplo, trans, diag, n, A, lda, x, incx); +} + +/*! \brief Solve the one of the matrix-vector operations for arbitrary data types + + \b Purpose: + + TBMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + x := A*x, or x := A**T*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular band matrix, with ( k + 1 ) diagonals. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the matrix A is an upper or + lower triangular matrix as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be performed as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, x := A*x. \n + trans = CBLAS_TRANSPOSE::CblasTrans, x := A**T*x. \n + trans = CBLAS_TRANSPOSE::CblasConjTrans, x := A**T*x. + + \param[in] diag + diag is enum CBLAS_DIAG + diag specifies specifies whether or not A is unit triangular + as follows: \n + diag = CBLAS_DIAG::CblasUnit A is assumed to be unit triangular.\n + diag = CBLAS_DIAG::CblasNonUnit A is not assumed to be unit + triangular. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] k + k is INTEGER + On entry with UPLO = CblasUpper, k specifies the number of + super-diagonals of the matrix A. + On entry with UPLO = CblasLower, k specifies the number of + sub-diagonals of the matrix A. + k must at least zero. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension ( lda, n )\n + Before entry with UPLO = CblasUpper, the leading ( k + 1 ) + by n part of the array A must contain the upper triangular + band part of the matrix of coefficients, supplied column by + column, with the leading diagonal of the matrix in row + ( k + 1 ) of the array, the first super-diagonal starting at + position 2 in row k, and so on. The top left k by k triangle + of the array A is not referenced. \n + Before entry with UPLO = CblasLower, the leading ( k + 1 ) + by n part of the array A must contain the lower triangular + band part of the matrix of coefficients, supplied column by + column, with the leading diagonal of the matrix in row 1 of + the array, the first sub-diagonal starting at position 1 in + row 2, and so on. The bottom right k by k triangle of the + array A is not referenced. \n + Note that when DIAG = CblasUnit the elements of the array A + corresponding to the diagonal elements of the matrix are not + referenced, but are assumed to be unity. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, ( k + 1 ) ). + + \param[in, out] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : \n + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x.On exit, x is overwritten with the transformed vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + */ +template< typename T > +void tbmv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + CBLAS_DIAG diag, + int64_t n, int64_t k, + T const *A, int64_t lda, + T *x, int64_t incx ) +{ + cblas_tbmv(layout, uplo, trans, diag, n, k, A, lda, x, incx); +} + + +/*! \brief Solve the one of the matrix-vector operations for arbitrary data types + + \b Purpose: + + TPMV performs one of the matrix-vector operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + x := A*x, or x := A**T*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix, supplied in packed form. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the matrix A is an upper or + lower triangular matrix as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be performed as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, x := A*x. \n + trans = CBLAS_TRANSPOSE::CblasTrans, x := A**T*x. \n + trans = CBLAS_TRANSPOSE::CblasConjTrans, x := A**T*x. + + \param[in] diag + diag is enum CBLAS_DIAG + diag specifies specifies whether or not A is unit triangular + as follows: \n + diag = CBLAS_DIAG::CblasUnit A is assumed to be unit triangular.\n + diag = CBLAS_DIAG::CblasNonUnit A is not assumed to be unit + triangular. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] Ap + Ap is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension + ( ( n*( n + 1 ) )/2 ). \n + Before entry with UPLO = CblasUpper, the array Ap must + contain the upper triangular matrix packed sequentially, + column by column, so that Ap( 1 ) contains a( 1, 1 ), + Ap( 2 ) and Ap( 3 ) contain a( 1, 2 ) and a( 2, 2 ) + respectively, and so on. \n + Before entry with UPLO = CblasLower, the array Ap must + contain the lower triangular matrix packed sequentially, + column by column, so that Ap( 1 ) contains a( 1, 1 ), + Ap( 2 ) and Ap( 3 ) contain a( 2, 1 ) and a( 3, 1 ) + respectively, and so on. \n + Note that when DIAG = CblasUnit, the diagonal elements of + A are not referenced, but are assumed to be unity. + + \param[in, out] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : \n + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + vector x.On exit, x is overwritten with the transformed vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + */ +template< typename T > +void tpmv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + CBLAS_DIAG diag, + int64_t n, + T const *Ap, + T *x, int64_t incx ) +{ + cblas_tpmv(layout, uplo, trans, diag, n, Ap, x, incx); +} + +/*! \brief Solve the one of the triangular matrix-vector equation for arbitrary data types + + \b Purpose: + + TRSV solves one of the systems of equations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + A*x = b, or A**T*x = b, + + where b and x are n element vectors and A is an n by n unit, or + non-unit, upper or lower triangular matrix + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the matrix A is an upper or + lower triangular matrix as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be performed as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, A*x = b. \n + trans = CBLAS_TRANSPOSE::CblasTrans, A**T*x = b. \n + trans = CBLAS_TRANSPOSE::CblasConjTrans, A**T*x = b. + + \param[in] diag + diag is enum CBLAS_DIAG + diag specifies specifies whether or not A is unit triangular + as follows: \n + diag = CBLAS_DIAG::CblasUnit A is assumed to be unit triangular.\n + diag = CBLAS_DIAG::CblasNonUnit A is not assumed to be unit + triangular. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension ( lda, n )\n + Before entry with UPLO = CblasUpper, the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. \n + Before entry with UPLO = CblasLower, the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. \n + Note that when DIAG = CblasUnit, the diagonal elements of + A are not referenced either, but are assumed to be unity. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, n ). + + \param[in, out] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + element right-hand side vector b.On exit, x is overwritten + with the transformed vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + */ +template< typename T > +void trsv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + CBLAS_DIAG diag, + int64_t n, + T const *A, int64_t lda, + T *x, int64_t incx ) +{ + cblas_trsv(layout, uplo, trans, diag, n, A, lda, x, incx); +} + +/*! \brief Solve the one of the triangular matrix-vector equation for arbitrary data types + + \b Purpose: + + TBSV solves one of the systems of equations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + A*x = b, or A**T*x = b, + + where b and x are n element vectors and A is an n by n unit, or + non-unit, upper or lower triangular band matrix, with ( k + 1 ) + diagonals. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the matrix A is an upper or + lower triangular matrix as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be performed as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, A*x = b. \n + trans = CBLAS_TRANSPOSE::CblasTrans, A**T*x = b. \n + trans = CBLAS_TRANSPOSE::CblasConjTrans, A**T*x = b. + + \param[in] diag + diag is enum CBLAS_DIAG + diag specifies specifies whether or not A is unit triangular + as follows: \n + diag = CBLAS_DIAG::CblasUnit A is assumed to be unit triangular.\n + diag = CBLAS_DIAG::CblasNonUnit A is not assumed to be unit + triangular. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] k + k is INTEGER + On entry with UPLO = CblasUpper, k specifies the number of + super-diagonals of the matrix A. + On entry with UPLO = CblasLower, k specifies the number of + sub-diagonals of the matrix A. + k must at least zero. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension ( lda, n )\n + Before entry with UPLO = CblasUpper, the leading ( k + 1 ) + by n part of the array A must contain the upper triangular + band part of the matrix of coefficients, supplied column by + column, with the leading diagonal of the matrix in row + ( k + 1 ) of the array, the first super-diagonal starting at + position 2 in row k, and so on. The top left k by k triangle + of the array A is not referenced. \n + Before entry with UPLO = CblasLower, the leading ( k + 1 ) + by n part of the array A must contain the lower triangular + band part of the matrix of coefficients, supplied column by + column, with the leading diagonal of the matrix in row 1 of + the array, the first sub-diagonal starting at position 1 in + row 2, and so on. The bottom right k by k triangle of the + array A is not referenced. \n + Note that when DIAG = CblasUnit, the elements of the array A + corresponding to the diagonal elements of the matrix are not + referenced, but are assumed to be unity. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, k+1 ). + + \param[in, out] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + element right-hand side vector b.On exit, x is overwritten + with the solution vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + */ +template< typename T > +void tbsv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + CBLAS_DIAG diag, + int64_t n, int64_t k, + T const *A, int64_t lda, + T *x, int64_t incx ) +{ + cblas_tbsv(layout, uplo, trans, diag, n, k, A, lda, x, incx); +} + + +/*! \brief Solve the one of the triangular matrix-vector equation for arbitrary data types + + \b Purpose: + + TPSV solves one of the systems of equations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + A*x = b, or A**T*x = b, + + where b and x are n element vectors and A is an n by n unit, or + non-unit, upper or lower triangular band matrix, supplied in packed form. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the matrix A is an upper or + lower triangular matrix as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be performed as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, A*x = b. \n + trans = CBLAS_TRANSPOSE::CblasTrans, A**T*x = b. \n + trans = CBLAS_TRANSPOSE::CblasConjTrans, A**T*x = b. + + \param[in] diag + diag is enum CBLAS_DIAG + diag specifies specifies whether or not A is unit triangular + as follows: \n + diag = CBLAS_DIAG::CblasUnit A is assumed to be unit triangular.\n + diag = CBLAS_DIAG::CblasNonUnit A is not assumed to be unit + triangular. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A.n must be at least zero. + + \param[in] Ap + Ap is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension + ( ( n*( n + 1 ) )/2 ). \n + Before entry with UPLO = CblasUpper, the array Ap must + contain the upper triangular matrix packed sequentially, + column by column, so that Ap( 1 ) contains a( 1, 1 ), + Ap( 2 ) and Ap( 3 ) contain a( 1, 2 ) and a( 2, 2 ) + respectively, and so on. \n + Before entry with UPLO = CblasLower, the array Ap must + contain the lower triangular matrix packed sequentially, + column by column, so that Ap( 1 ) contains a( 1, 1 ), + Ap( 2 ) and Ap( 3 ) contain a( 2, 1 ) and a( 3, 1 ) + respectively, and so on. \n + Note that when DIAG = CblasUnit, the diagonal elements of + A are not referenced, but are assumed to be unity. + + \param[in, out] x + x is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the + element right-hand side vector b.On exit, x is overwritten + with the solution vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + */ +template< typename T > +void tpsv( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + CBLAS_DIAG diag, + int64_t n, + T const *Ap, + T *x, int64_t incx ) +{ + cblas_tpsv(layout, uplo, trans, diag, n, Ap, x, incx); +} + +/*! \brief Perform the General matrix rank-1 update for arbitrary data types + + \b Purpose: + + GER performs the rank 1 operation for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + + A := alpha*x*y**T + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix A. + m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is REAL/DOUBLE PRECISION array,dimension : + at least ( 1 + ( m - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the m + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] y + y is REAL/DOUBLE PRECISION array,dimension : + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry, the incremented array y must contain the n + element vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + + \param[in,out] A + A is REAL/DOUBLE PRECISION array,dimension ( lda, n )\n + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, m ). + */ +template< typename T > +void ger( + CBLAS_ORDER layout, + int64_t m, int64_t n, + T alpha, + T const *x, int64_t incx, + T const *y, int64_t incy, + T *A, int64_t lda ) +{ + cblas_ger(layout, m, n, alpha, x, incx, y, incy, A, lda); +} + +/*! \brief Perform the General matrix rank-1 update for arbitrary data types + + \b Purpose: + + GERU performs the rank 1 operation for arbitrary data types + Data precisions supported include SINGLE/DOUBLE PRECISION COMPLEX(COMPLEX*16) + + A := alpha*x*y**T + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix A. + m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION COMPLEX + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION COMPLEX array,dimension : + at least ( 1 + ( m - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the m + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] y + y is SINGLE/DOUBLE PRECISION COMPLEX array,dimension : + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry, the incremented array y must contain the n + element vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + + \param[in,out] A + A is SINGLE/DOUBLE PRECISION COMPLEX array,dimension ( lda, n )\n + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, m ). + */ +template< typename T > +void geru( + CBLAS_ORDER layout, + int64_t m, int64_t n, + T alpha, + T const *x, int64_t incx, + T const *y, int64_t incy, + T *A, int64_t lda ) +{ + cblas_geru(layout, m, n, alpha, x, incx, y, incy, A, lda); +} + +/*! \brief Perform the General matrix rank-1 update for arbitrary data types + + \b Purpose: + + GERC performs the rank 1 operation for arbitrary data types + Data precisions supported include SINGLE/DOUBLE PRECISION COMPLEX(COMPLEX*16) + + A := alpha*x*y**T + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix A. + m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION COMPLEX + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION COMPLEX array,dimension : + at least ( 1 + ( m - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the m + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] y + y is SINGLE/DOUBLE PRECISION COMPLEX array,dimension : + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry, the incremented array y must contain the n + element vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + + \param[in,out] A + A is SINGLE/DOUBLE PRECISION COMPLEX array,dimension ( lda, n )\n + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, m ). + */ +template< typename T > +void gerc( + CBLAS_ORDER layout, + int64_t m, int64_t n, + T alpha, + T const *x, int64_t incx, + T const *y, int64_t incy, + T *A, int64_t lda ) +{ + cblas_gerc(layout, m, n, alpha, x, incx, y, incy, A, lda); +} + +/*! \brief Perform the hermitian rank 1 operation for arbitrary data types + + \b Purpose: + + HER performs the hermitian rank 1 operation for arbitrary data types + Data precisions supported include SINGLE/DOUBLE PRECISION COMPLEX(COMPLEX*16) + + A := alpha*x*x**H + A, + + where alpha is a real scalar, x is an n element vector, A is an n by n + hermitian matrix. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the upper or lower triangular + part of the array A is to be referenced as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION REAL + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION COMPLEX array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the n + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in,out] A + A is SINGLE/DOUBLE PRECISION COMPLEX array,dimension ( lda, n )\n + Before entry with UPLO = CblasUpper, the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. \n + Before entry with UPLO = CblasLower, the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. \n + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, n ). + */ +template< typename T > +void her( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + real_type alpha, // zher takes double alpha; use real + T const *x, int64_t incx, + T *A, int64_t lda ) +{ + cblas_her(layout, uplo, n, alpha, x, incx, A, lda); +} + +/*! \brief Perform the hermitian rank 1 operation for arbitrary data types + + \b Purpose: + + HPR performs the hermitian rank 1 operation for arbitrary data types + Data precisions supported include SINGLE/DOUBLE PRECISION COMPLEX(COMPLEX*16) + + A := alpha*x*x**H + A, + + where alpha is a real scalar, x is an n element vector, A is an n by n + hermitian matrix, supplied in packed form. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the upper or lower triangular + part of the array A is to be referenced as follows: \n + uplo = CBLAS_UPLO::CblasUpper The upper triangular part of A is + supplied in Ap. \n + uplo = CBLAS_UPLO::CblasLower The lower triangular part of A is + supplied in Ap. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION REAL + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION COMPLEX array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the n + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in,out] Ap + Ap is SINGLE/DOUBLE PRECISION COMPLEX array,dimension + atleast ( ( n*( n + 1 ) )/2 ).\n + Before entry with UPLO = CblasUpper, the array Ap must + contain the upper triangular part of the hermitian matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 1, 2 ) + and a( 2, 2 ) respectively, and so on. On exit, the array + Ap is overwritten by the upper triangular part of the + updated matrix. \n + Before entry with UPLO = CblasLower, the array Ap must + contain the lower triangular part of the hermitian matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 2, 1 ) + and a( 3, 1 ) respectively, and so on. On exit, the array + Ap is overwritten by the lower triangular part of the + updated matrix. \n + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + */ +template< typename T > +void hpr( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + real_type alpha, // zher takes double alpha; use real + T const *x, int64_t incx, + T *Ap ) +{ + cblas_hpr(layout, uplo, n, alpha, x, incx, Ap); +} + +/*! \brief Perform the hermitian rank 2 operation for arbitrary data types + + \b Purpose: + + HER2 performs the hermitian rank 2 operation for arbitrary data types + Data precisions supported include SINGLE/DOUBLE PRECISION COMPLEX(COMPLEX*16) + + A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + + where alpha is a scalar, x and y are n element vector, A is an n by n + hermitian matrix. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies whether the upper or lower triangular part of the + array A is to be referenced as follows: \n + UPLO = CblasUpper Only the upper triangular part of A + is to be referenced. \n + UPLO = CblasLower Only the lower triangular part of A + is to be referenced. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION COMPLEX + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION COMPLEX array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the n + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] y + y is SINGLE/DOUBLE PRECISION COMPLEX array,dimension : + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry, the incremented array y must contain the n + element vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + + \param[in,out] A + A is SINGLE/DOUBLE PRECISION COMPLEX array,dimension ( lda, n )\n + Before entry with UPLO = CblasUpper, the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. \n + Before entry with UPLO = CblasLower, the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. \n + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, n ). + */ +template< typename T > +void her2( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *x, int64_t incx, + T const *y, int64_t incy, + T *A, int64_t lda ) +{ + cblas_her2(layout, uplo, n, alpha, x, incx, y, incy, A, lda); +} + +/*! \brief Perform the hermitian rank 2 operation for arbitrary data types + + \b Purpose: + + HPR2 performs the hermitian rank 2 operation for arbitrary data types + Data precisions supported include SINGLE/DOUBLE PRECISION COMPLEX(COMPLEX*16) + + A := alpha*x*y**H + conjg( alpha )*y*x**H + A, + + where alpha is a scalar, x and y are n element vector, A is an n by n + hermitian matrix, supplied in packed form. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the upper or lower triangular + part of the array A is to be referenced as follows: \n + uplo = CBLAS_UPLO::CblasUpper The upper triangular part of A is + supplied in Ap. \n + uplo = CBLAS_UPLO::CblasLower The lower triangular part of A is + supplied in Ap. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION COMPLEX + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION COMPLEX array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the n + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] y + y is SINGLE/DOUBLE PRECISION REAL array,dimension : + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry, the incremented array y must contain the n + element vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + + \param[in,out] Ap + Ap is SINGLE/DOUBLE PRECISION COMPLEX array,dimension + atleast ( ( n*( n + 1 ) )/2 ).\n + Before entry with UPLO = CblasUpper, the array Ap must + contain the upper triangular part of the hermitian matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 1, 2 ) + and a( 2, 2 ) respectively, and so on. On exit, the array + Ap is overwritten by the upper triangular part of the + updated matrix. \n + Before entry with UPLO = CblasLower, the array Ap must + contain the lower triangular part of the hermitian matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 2, 1 ) + and a( 3, 1 ) respectively, and so on. On exit, the array + Ap is overwritten by the lower triangular part of the + updated matrix. \n + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + */ +template< typename T > +void hpr2( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *x, int64_t incx, + T const *y, int64_t incy, + T *Ap ) +{ + cblas_hpr2(layout, uplo, n, alpha, x, incx, y, incy, Ap); +} + +/*! \brief Perform the symmetric rank 1 operation for arbitrary data types + + \b Purpose: + + SYR performs the symmetric rank 1 operation for arbitrary data types + Data precisions supported include SINGLE/DOUBLE PRECISION REAL + + A := alpha*x*x**T + A, + + where alpha is a real scalar, x is an n element vector, A is an n by n + symmetric matrix. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the upper or lower triangular + part of the array A is to be referenced as follows: \n + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. \n + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION REAL + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION REAL array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the n + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in,out] A + A is SINGLE/DOUBLE PRECISION REAL array,dimension ( lda, n )\n + Before entry with UPLO = CblasUpper, the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. \n + Before entry with UPLO = CblasLower, the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. \n + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, n ). + */ +template< typename T > +void syr( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *x, int64_t incx, + T *A, int64_t lda ) +{ + cblas_syr(layout, uplo, n, alpha, x, incx, A, lda); +} + +/*! \brief Perform the symmetric rank 1 operation for arbitrary data types + + \b Purpose: + + SPR performs the symmetric rank 1 operation for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL + + A := alpha*x*x**T + A, + + where alpha is a real scalar, x is an n element vector, A is an n by n + symmetric matrix, supplied in packed form. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the upper or lower triangular + part of the array A is to be referenced as follows: \n + uplo = CBLAS_UPLO::CblasUpper The upper triangular part of A is + supplied in Ap. \n + uplo = CBLAS_UPLO::CblasLower The lower triangular part of A is + supplied in Ap. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION REAL + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION REAL array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the n + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in,out] Ap + Ap is SINGLE/DOUBLE PRECISION REAL array,dimension + atleast ( ( n*( n + 1 ) )/2 ).\n + Before entry with UPLO = CblasUpper, the array Ap must + contain the upper triangular part of the symmetric matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 1, 2 ) + and a( 2, 2 ) respectively, and so on. On exit, the array + Ap is overwritten by the upper triangular part of the + updated matrix. \n + Before entry with UPLO = CblasLower, the array Ap must + contain the lower triangular part of the symmetric matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 2, 1 ) + and a( 3, 1 ) respectively, and so on. On exit, the array + Ap is overwritten by the lower triangular part of the + updated matrix. \n + */ +template< typename T > +void spr( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *x, int64_t incx, + T *Ap ) +{ + cblas_spr(layout, uplo, n, alpha, x, incx, Ap); +} + +/*! \brief Perform the symmetric rank 2 operation for arbitrary data types + + \b Purpose: + + SYR2 performs the symmetric rank 2 operation for arbitrary data types + Data precisions supported include SINGLE/DOUBLE PRECISION REAL + + A := alpha*x*y**T + alpha*y*x**T + A, + + where alpha is a scalar, x and y are n element vector, A is an n by n + symmetric matrix. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies whether the upper or lower triangular part of the + array A is to be referenced as follows: \n + UPLO = CblasUpper Only the upper triangular part of A + is to be referenced. \n + UPLO = CblasLower Only the lower triangular part of A + is to be referenced. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION REAL + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION REAL array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the n + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] y + y is SINGLE/DOUBLE PRECISION REAL array,dimension : + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry, the incremented array y must contain the n + element vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + + \param[in,out] A + A is SINGLE/DOUBLE PRECISION REAL array,dimension ( lda, n )\n + Before entry with UPLO = CblasUpper, the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. \n + Before entry with UPLO = CblasLower, the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. \n + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + lda must be at least max( 1, n ). + */ +template< typename T > +void syr2( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *x, int64_t incx, + T const *y, int64_t incy, + T *A, int64_t lda ) +{ + cblas_syr2(layout, uplo, n, alpha, x, incx, y, incy, A, lda); +} + +/*! \brief Perform the symmetric rank 2 operation for arbitrary data types + + \b Purpose: + + SPR2 performs the symmetric rank 2 operation for arbitrary data types + Data precisions supported include SINGLE/DOUBLE PRECISION REAL + + A := alpha*x*y**T + alpha*y*x**T + A, + + where alpha is a scalar, x and y are n element vector, A is an n by n + symmetric matrix, supplied in packed form. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO. + uplo specifies specifies whether the upper or lower triangular + part of the array A is to be referenced as follows: \n + uplo = CBLAS_UPLO::CblasUpper The upper triangular part of A is + supplied in Ap. \n + uplo = CBLAS_UPLO::CblasLower The lower triangular part of A is + supplied in Ap. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix A. + n must be at least zero. + + \param[in] alpha + alpha is SINGLE/DOUBLE PRECISION REAL + On entry, alpha specifies the scalar alpha. + + \param[in] x + x is SINGLE/DOUBLE PRECISION REAL array,dimension : + at least ( 1 + ( n - 1 )*abs( incx ) ). \n + Before entry, the incremented array x must contain the n + element vector x. + + \param[in] incx + incx is INTEGER + On entry, incx specifies the increment for the elements of x. + incx must not be zero. + + \param[in] y + y is SINGLE/DOUBLE PRECISION REAL array,dimension : + at least ( 1 + ( n - 1 )*abs( incy ) ). \n + Before entry, the incremented array y must contain the n + element vector y. + + \param[in] incy + incy is INTEGER + On entry, incy specifies the increment for the elements of y. + incy must not be zero. + + \param[in,out] Ap + Ap is SINGLE/DOUBLE PRECISION REAL array,dimension + atleast ( ( n*( n + 1 ) )/2 ).\n + Before entry with UPLO = CblasUpper, the array Ap must + contain the upper triangular part of the symmetric matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 1, 2 ) + and a( 2, 2 ) respectively, and so on. On exit, the array + Ap is overwritten by the upper triangular part of the + updated matrix. \n + Before entry with UPLO = CblasLower, the array Ap must + contain the lower triangular part of the symmetric matrix + packed sequentially, column by column, so that Ap( 1 ) + contains a( 1, 1 ), Ap( 2 ) and Ap( 3 ) contain a( 2, 1 ) + and a( 3, 1 ) respectively, and so on. On exit, the array + Ap is overwritten by the lower triangular part of the + updated matrix. \n + */ +template< typename T > +void spr2( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + int64_t n, + T alpha, + T const *x, int64_t incx, + T const *y, int64_t incy, + T *Ap ) +{ + cblas_spr2(layout, uplo, n, alpha, x, incx, y, incy, Ap); +} + +/*! \brief General matrix-matrix multiply for arbitrary data types + + \b Purpose: + + GEMM performs general matrix-matrix multiply for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X**T or op( X ) = X**H, + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] transA + transA is CBLAS_TRANSPOSE + On entry, transA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + transA = CBLAS_TRANSPOSE::CblasNoTrans, op( A ) = A. + transA = CBLAS_TRANSPOSE::CblasTrans, op( A ) = A**T. + transA = CBLAS_TRANSPOSE::CblasConjTrans, op( A ) = A**H. + + \param[in] transB + transB is CBLAS_TRANSPOSE + On entry, transB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + transB = CBLAS_TRANSPOSE::CblasNoTrans, op( B ) = B. + transB = CBLAS_TRANSPOSE::CblasTrans, op( B ) = B**T. + transB = CBLAS_TRANSPOSE::CblasConjTrans, op( B ) = B**H. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix + op( A ) and of the matrix C. m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. n must be + at least zero. + + \param[in] k + k is INTEGER + On entry, k specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). k must + be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + If transA = CblasNoTrans: + m-by-k , stored in an lda-by-k array [RowMajor: m-by-lda]. + Otherwise: + k-by-m , stored in an lda-by-m array [RowMajor: k-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + If transA = CblasNoTrans: lda >= max(1, m) [RowMajor: lda >= max(1, k)]. + Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, m)]. + + \param[in] B + B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + If transA = CblasNoTrans: + k-by-n , stored in an ldb-by-n array [RowMajor: k-by-ldb]. + Otherwise: + n-by-k , stored in an ldb-by-k array [RowMajor: n-by-ldb]. + + \param[in] ldb + ldb is INTEGER + On entry, ldb specifies the Leading dimension of B + If transA = CblasNoTrans: ldb >= max(1, k) [RowMajor: ldb >= max(1, n)]. + Otherwise: ldb >= max(1, n) [RowMajor: ldb >= max(1, k)]. + + \param[in] beta + beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then C need not be set on input. + + \param[in,out] C + C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension : + m-by-n stored in an ldc-by-n array [RowMajor: m-by-ldc]. + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + \param[in] ldc + ldc is INTEGER + On entry, ldc specifies the first dimension of C + ldc >= max(1, m) [RowMajor: ldc >= max(1, n)]. + */ +template< typename T > void gemm( - CBLAS_LAYOUT layout, + CBLAS_ORDER layout, CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, int64_t m, int64_t n, int64_t k, - scalar_type alpha, - TA const *A, int64_t lda, - TB const *B, int64_t ldb, - scalar_type beta, - TC *C, int64_t ldc ) + T alpha, + T const *A, int64_t lda, + T const *B, int64_t ldb, + T beta, + T *C, int64_t ldc ) { -// printf("In gemm.cpp\n"); cblas_gemm(layout, transA, transB, m, n, k, alpha, A,lda, B, ldb, beta, C, ldc); +} -}; +/*! \brief Solve the triangular matrix-matrix equation for arbitrary data types + + \b Purpose: + + TRSM performs one of the matrix equations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + where op( X ) is one of + + op( A ) = A or op( A ) = A**T or op( A ) = A**H. + + The matrix X is overwritten on B. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] side + side is enum CBLAS_SIDE + side specifies specifies whether op( A ) appears on the left + or right of X as follows: + side = CBLAS_SIDE::CblasLeft op( A )*X = alpha*B. + side = CBLAS_SIDE::CblasRight op( A )*X = alpha*B. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the matrix A is an upper or + lower triangular matrix as follows: + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the form of op( A ) to be used in + the matrix multiplication as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, op( A ) = A. + trans = CBLAS_TRANSPOSE::CblasTrans, op( A ) = A**T. + trans = CBLAS_TRANSPOSE::CblasConjTrans, op( A ) = A**H. + + \param[in] diag + diag is enum CBLAS_DIAG + diag specifies specifies whether or not A is unit triangular + as follows: + diag = CBLAS_DIAG::CblasUnit A is assumed to be unit triangular. + diag = CBLAS_DIAG::CblasNonUnit A is not assumed to be unit + triangular. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix + B. m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix + B. n must be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + If side = CblasLeft: + the m-by-m matrix A, stored in an lda-by-m array [RowMajor: m-by-lda]. + If side = CblasRight: + the n-by-n matrix A, stored in an lda-by-n array [RowMajor: n-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + If side = CblasLeft: lda >= max(1, m) . + If side = CblasRight:lda >= max(1, k) . + + \param[in,out] B + B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + m-by-n , stored in an ldb-by-n array [RowMajor: m-by-ldb]. + on exit is overwritten by the solution matrix X. + + \param[in] ldb + ldb is INTEGER + On entry, ldb specifies the Leading dimension of B + ldb >= max(1, m) [RowMajor: ldb >= max(1, n)]. + */ +template< typename T > +void trsm( + CBLAS_ORDER layout, + CBLAS_SIDE side, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + CBLAS_DIAG diag, + int64_t m, + int64_t n, + T alpha, + T const *A, int64_t lda, + T *B, int64_t ldb ) +{ + cblas_trsm( layout, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); +} +/*! \brief Solve the Triangular matrix-matrix multiply for arbitrary data types + + \b Purpose: + + TRMM performs solves one of the matrix equations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + B := alpha*op( A )*B, or B := alpha*B*op( A ), + + where alpha is a scalar, B is an m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + op( A ) = A or op( A ) = A**T. + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] side + side is enum CBLAS_SIDE + side specifies whether op( A ) multiplies B from left or right of X + as follows: + side = CBLAS_SIDE::CblasLeft B := alpha*op( A )*B. + side = CBLAS_SIDE::CblasRight B := alpha*B*op( A ). + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies whether the matrix A is an upper or lower triangular + matrix as follows: + uplo = CBLAS_UPLO::CblasUpper A is an upper triangular matrix. + uplo = CBLAS_UPLO::CblasLower A is a lower triangular matrix. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the form of op( A ) to be used in + the matrix multiplication as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, op( A ) = A. + trans = CBLAS_TRANSPOSE::CblasTrans, op( A ) = A**T. + trans = CBLAS_TRANSPOSE::CblasConjTrans, op( A ) = A**T. + + \param[in] diag + diag is enum CBLAS_DIAG + diag specifies specifies whether or not A is unit triangular + as follows: + diag = CBLAS_DIAG::CblasUnit A is assumed to be unit triangular. + diag = CBLAS_DIAG::CblasNonUnit A is not assumed to be unit + triangular. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix + B. m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix + B. n must be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha.When alpha is + zero then A is not referenced and B need not be set before + entry. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + If side = CblasLeft: + the m-by-m matrix A, stored in an lda-by-m array [RowMajor: m-by-lda]. + If side = CblasRight: + the n-by-n matrix A, stored in an lda-by-n array [RowMajor: n-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + If side = CblasLeft: lda >= max(1, m) . + If side = CblasRight:lda >= max(1, n) . + + \param[in,out] B + B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + m-by-n , stored in an ldb-by-n array [RowMajor: m-by-ldb]. + + \param[in] ldb + ldb is INTEGER + On entry, ldb specifies the Leading dimension of B + ldb >= max(1, m) [RowMajor: ldb >= max(1, n)]. + */ +template< typename T > +void trmm( + CBLAS_ORDER layout, + CBLAS_SIDE side, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + CBLAS_DIAG diag, + int64_t m, + int64_t n, + T alpha, + T const *A, int64_t lda, + T *B, int64_t ldb ) +{ + cblas_trmm( layout, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); +} + +/*! \brief Solve the Hermitian matrix-matrix multiply for arbitrary data types + + \b Purpose: + + HEMM performs solves one of the matrix-matrix operations for arbitrary data types + Data precisions supported include SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + C := alpha*A*B + beta*C + + or + + C := alpha*B*A + beta*C, + + where alpha is a scalar, A is an hermitian matrix + C and B are m by n matrices + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] side + side is enum CBLAS_SIDE + side specifies specifies whether the hermitian matrix A + appears on the left or right in the operation as follows: + side = CBLAS_SIDE::CblasLeft C := alpha*A*B + beta*C, + side = CBLAS_SIDE::CblasRight C := alpha*B*A + beta*C + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the upper or lower + triangular part of the hermitian matrix A is to be + referenced as follows: + uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of the + hermitian matrix is to be referenced. + uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of the + hermitian matrix is to be referenced. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix + C. m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix + C. n must be at least zero. + + \param[in] alpha + alpha is COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is COMPLEX/COMPLEX*16 array,dimension : + If side = CblasLeft: + the m-by-m matrix A, stored in an lda-by-m array [RowMajor: m-by-lda]. + If side = CblasRight: + the n-by-n matrix A, stored in an lda-by-n array [RowMajor: n-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + If side = CblasLeft: lda >= max(1, m) . + If side = CblasRight:lda >= max(1, k) . + + \param[in] B + B is COMPLEX/COMPLEX*16 array,dimension : + m-by-n , stored in an ldb-by-n array [RowMajor: m-by-ldb]. + + \param[in] ldb + ldb is INTEGER + On entry, ldb specifies the Leading dimension of B + ldb >= max(1, m) [RowMajor: ldb >= max(1, n)]. + + \param[in] beta + beta is COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar beta. + If beta is zero, C need not be set on input + + \param[in,out] C + C is COMPLEX/COMPLEX*16 array,dimension : + m-by-n , stored in an ldc-by-n array [RowMajor: m-by-ldc]. + + \param[in] ldc + ldc is INTEGER + On entry, ldc specifies the Leading dimension of C + ldc >= max(1, m) [RowMajor: ldc >= max(1, n)]. + */ +template< typename T > +void hemm( + CBLAS_ORDER layout, + CBLAS_SIDE side, + CBLAS_UPLO uplo, + int64_t m, int64_t n, + T alpha, + T const *A, int64_t lda, + T const *B, int64_t ldb, + T beta, + T *C, int64_t ldc ) +{ + cblas_hemm( layout, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); +} + +/*! \brief Solve the Symmetric matrix-matrix multiply for arbitrary data types + + \b Purpose: + + SYMM performs solves one of the matrix-matrix operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + C := alpha*A*B + beta*C + + or + + C := alpha*B*A + beta*C, + + where alpha is a scalar, A is an symmetric matrix + C and B are m by n matrices + + \param[in] layout + layout is enum CBLAS_ORDER + layout specifies Matrix storage as follows: + layout = CBLAS_ORDER::CblasRowMajor or Layout::CblasColMajor. + + \param[in] side + side is enum CBLAS_SIDE + side specifies specifies whether the symmetric matrix A + appears on the left or right in the operation as follows: + side = CBLAS_SIDE::CblasLeft C := alpha*A*B + beta*C, + side = CBLAS_SIDE::CblasRight C := alpha*B*A + beta*C + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the upper or lower + triangular part of the symmetric matrix A is to be + referenced as follows: + uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of the + symmetric matrix is to be referenced. + uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of the + symmetric matrix is to be referenced. + + \param[in] m + m is INTEGER + On entry, m specifies the number of rows of the matrix + C. m must be at least zero. + + \param[in] n + n is INTEGER + On entry, n specifies the number of columns of the matrix + C. n must be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + If side = CblasLeft: + the m-by-m matrix A, stored in an lda-by-m array [RowMajor: m-by-lda]. + If side = CblasRight: + the n-by-n matrix A, stored in an lda-by-n array [RowMajor: n-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + If side = CblasLeft: lda >= max(1, m) . + If side = CblasRight:lda >= max(1, k) . + + \param[in] B + B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + m-by-n , stored in an ldb-by-n array [RowMajor: m-by-ldb]. + + \param[in] ldb + ldb is INTEGER + On entry, ldb specifies the Leading dimension of B + ldb >= max(1, m) [RowMajor: ldb >= max(1, n)]. + + \param[in] beta + beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar beta. + If beta is zero, C need not be set on input + + \param[in, out] C + C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + m-by-n , stored in an ldc-by-n array [RowMajor: m-by-ldc]. + + \param[in] ldc + ldc is INTEGER + On entry, ldc specifies the Leading dimension of C + ldc >= max(1, m) [RowMajor: ldc >= max(1, n)]. + */ +template< typename T > +void symm( + CBLAS_ORDER layout, + CBLAS_SIDE side, + CBLAS_UPLO uplo, + int64_t m, int64_t n, + T alpha, + T const *A, int64_t lda, + T const *B, int64_t ldb, + T beta, + T *C, int64_t ldc ) +{ + cblas_symm( layout, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); +} + +/*! \brief Solve the Symmetric rank-k operations for arbitrary data types + + \b Purpose: + + SYRK performs one of the symmetric rank k operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + C := alpha*A*A**T + beta*C, + + or + + C := alpha*A**T*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A is an n by k matrix in the first case and a k by n matrix + in the second case. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the upper or lower + triangular part of the array C is to be referenced + as follows: + uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of C + is to be referenced. + uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of C + is to be referenced. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be used as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans,C := alpha*A*A**T + beta*C. + trans = CBLAS_TRANSPOSE::CblasTrans,C := alpha*A**T*A + beta*C. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix C. n must be + at least zero. + + \param[in] k + k is INTEGER + If trans = CblasNoTrans: k is number of columns of the matrix A. + Otherwise: k is number of rows of the matrix A. + k must be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + If transA = CblasNoTrans: + n-by-k , stored in an lda-by-k array [RowMajor: n-by-lda]. + Otherwise: + k-by-n , stored in an lda-by-n array [RowMajor: k-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + If transA = CblasNoTrans: lda >= max(1, n) [RowMajor: lda >= max(1, k)]. + Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, n)]. + + \param[in] beta + beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then C need not be set on input. + + \param[in,out] C + C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension : + The n-by-n symmetric matrix C, + stored in an ldc-by-n array [RowMajor: n-by-ldc]. + On exit, the array C is overwritten by the lower/upper + triangular part of the updated matrix. + + \param[in] ldc + ldc is INTEGER + On entry, ldc specifies the first dimension of C + ldc >= max(1, n) + */ +template< typename T > +void syrk( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + int64_t n, int64_t k, + T alpha, + T const *A, int64_t lda, + T beta, + T *C, int64_t ldc ) +{ + cblas_syrk( layout, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +/*! \brief Solve the Symmetric rank 2k operations for arbitrary data types + + \b Purpose: + + SYR2K performs one of the symmetric rank 2k operations for arbitrary data types + Data precisions supported include SINGLE PRECISION REAL, DOUBLE PRECISION REAL, + SINGLE PRECISION COMPLEX, DOUBLE PRECISION COMPLEX(COMPLEX*16) + + C := alpha*A*B**T + alpha*B*A**T + beta*C, + + or + + C := alpha*A**T*B + alpha*B**T*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A and B are n by k matrices in the first case and k by n matrices + in the second case. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the upper or lower + triangular part of the array C is to be referenced + as follows: + uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of C + is to be referenced. + uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of C + is to be referenced. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be used as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans,C := alpha*A*B**T + alpha*B*A**T + beta*C. + trans = CBLAS_TRANSPOSE::CblasTrans, C := alpha*A**T*B + alpha*B**T*A + beta*C. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix C. n must be + at least zero. + + \param[in] k + k is INTEGER + If trans = CblasNoTrans: k is number of columns of the matrices A & B. + Otherwise: k is number of rows of the matrices A & B. + k must be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + If trans = CblasNoTrans: + n-by-k , stored in an lda-by-k array [RowMajor: n-by-lda]. + Otherwise: + k-by-n , stored in an lda-by-n array [RowMajor: k-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + If trans = CblasNoTrans: lda >= max(1, n) [RowMajor: lda >= max(1, k)]. + Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, n)]. + + \param[in] B + B is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array,dimension : + If trans = CblasNoTrans: + n-by-k , stored in an ldb-by-k array [RowMajor: n-by-ldb]. + Otherwise: + k-by-n , stored in an ldb-by-n array [RowMajor: k-by-ldb] + + \param[in] ldb + ldb is INTEGER + On entry, ldb specifies the Leading dimension of B + If trans = CblasNoTrans: ldb >= max(1, n) [RowMajor: ldb >= max(1, k)]. + Otherwise: ldb >= max(1, k) [RowMajor: ldb >= max(1, n)]. + + \param[in] beta + beta is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then C need not be set on input. + + \param[in,out] C + C is REAL/DOUBLE PRECISION/COMPLEX/COMPLEX*16 array, dimension : + The n-by-n symmetric matrix C, + stored in an ldc-by-n array [RowMajor: n-by-ldc]. + On exit, the array C is overwritten by the lower/upper + triangular part of the updated matrix. + + \param[in] ldc + ldc is INTEGER + On entry, ldc specifies the first dimension of C + ldc >= max(1, n) + */ +template< typename T > +void syr2k( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + int64_t n, int64_t k, + T alpha, + T const *A, int64_t lda, + T const *B, int64_t ldb, + T beta, + T *C, int64_t ldc ) +{ + cblas_syr2k( layout, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc ); +} + +/*! \brief Solve the Hermitian rank k operations for arbitrary data types + + \b Purpose: + + HERK performs one of the hermitian rank k operations for arbitrary data types + Data precisions supported include SINGLE PRECISION COMPLEX, + DOUBLE PRECISION COMPLEX(COMPLEX*16) + + C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C, + + or + + C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C, + + where alpha and beta are real scalars, C is an n by n hermitian + matrix and A is an n by k matrix in the first case and + k by n matrix in the second case. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the upper or lower + triangular part of the array C is to be referenced + as follows: + uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of C + is to be referenced. + uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of C + is to be referenced. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be used as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, C := alpha*A*A**H + beta*C. + trans = CBLAS_TRANSPOSE::CblasConjTrans,C := alpha*A**H*A + beta*C. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix C. n must be + at least zero. + + \param[in] k + k is INTEGER + If trans = CblasNoTrans: k is number of columns of the matrix A. + Otherwise: k is number of rows of the matrix A. + k must be at least zero. + + \param[in] alpha + alpha is REAL/DOUBLE PRECISION + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is COMPLEX/COMPLEX*16 array,dimension : + If trans = CblasNoTrans: + n-by-k , stored in an lda-by-k array [RowMajor: n-by-lda]. + Otherwise: + k-by-n , stored in an lda-by-n array [RowMajor: k-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + If trans = CblasNoTrans: lda >= max(1, n) [RowMajor: lda >= max(1, k)]. + Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, n)]. + + \param[in] beta + beta is REAL/DOUBLE PRECISION + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then C need not be set on input. + + \param[in,out] C + C is COMPLEX/COMPLEX*16 array, dimension : + The n-by-n Hermitian matrix C, + stored in an ldc-by-n array [RowMajor: n-by-ldc]. + On exit, the array C is overwritten by the lower/upper + triangular part of the updated matrix. + + \param[in] ldc + ldc is INTEGER + On entry, ldc specifies the first dimension of C + ldc >= max(1, n) + */ +template< typename T > +void herk( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + int64_t n, int64_t k, + real_type alpha, + T const *A, int64_t lda, + real_type beta, + T *C, int64_t ldc ) +{ + cblas_herk( layout, uplo, trans, n, k, alpha, A, lda, beta, C, ldc ); +} + +/*! \brief Solve the Hermitian rank 2k operations for arbitrary data types + + \b Purpose: + + HER2K performs one of the hermitian rank 2k operations for arbitrary data types + Data precisions supported include SINGLE PRECISION COMPLEX, + DOUBLE PRECISION COMPLEX(COMPLEX*16) + + C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C, + + or + + C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C, + + where alpha and beta are scalars with beta real, C is an n by n + hermitian matrix and A and B are n by k matrices in the first case + and k by n matrices in the second case. + + \param[in] layout + layout is enum CBLAS_LAYOUT + layout specifies Matrix storage as follows: + layout = CBLAS_LAYOUT::CblasRowMajor or Layout::CblasColMajor. + + \param[in] uplo + uplo is enum CBLAS_UPLO + uplo specifies specifies whether the upper or lower + triangular part of the array C is to be referenced + as follows: + uplo = CBLAS_UPLO::CblasUpper Only the upper triangular part of C + is to be referenced. + uplo = CBLAS_UPLO::CblasLower Only the lower triangular part of C + is to be referenced. + + \param[in] trans + trans is CBLAS_TRANSPOSE + On entry, trans specifies the operation to be used as follows: + trans = CBLAS_TRANSPOSE::CblasNoTrans, C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C. + trans = CBLAS_TRANSPOSE::CblasConjTrans,C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C. + + \param[in] n + n is INTEGER + On entry, n specifies the order of the matrix C. n must be + at least zero. + + \param[in] k + k is INTEGER + If trans = CblasNoTrans: k is number of columns of the matrices A & B. + Otherwise: k is number of rows of the matrices A & B. + k must be at least zero. + + \param[in] alpha + alpha is COMPLEX/COMPLEX*16 + On entry, alpha specifies the scalar alpha. + + \param[in] A + A is COMPLEX/COMPLEX*16 array,dimension : + If trans = CblasNoTrans: + n-by-k , stored in an lda-by-k array [RowMajor: n-by-lda]. + Otherwise: + k-by-n , stored in an lda-by-n array [RowMajor: k-by-lda]. + + \param[in] lda + lda is INTEGER + On entry, lda specifies the Leading dimension of A + If trans = CblasNoTrans: lda >= max(1, n) [RowMajor: lda >= max(1, k)]. + Otherwise: lda >= max(1, k) [RowMajor: lda >= max(1, n)]. + + \param[in] B + B is COMPLEX/COMPLEX*16 array,dimension : + If trans = CblasNoTrans: + n-by-k , stored in an ldb-by-k array [RowMajor: n-by-ldb]. + Otherwise: + k-by-n , stored in an ldb-by-n array [RowMajor: k-by-ldb] + + \param[in] ldb + ldb is INTEGER + On entry, ldb specifies the Leading dimension of B + If trans = CblasNoTrans: ldb >= max(1, n) [RowMajor: ldb >= max(1, k)]. + Otherwise: ldb >= max(1, k) [RowMajor: ldb >= max(1, n)]. + + \param[in] beta + beta is REAL/DOUBLE PRECISION + On entry, beta specifies the scalar alpha.When beta is + supplied as zero then C need not be set on input. + + \param[in,out] C + C is COMPLEX/COMPLEX*16 array, dimension : + The n-by-n Hermitian matrix C, + stored in an ldc-by-n array [RowMajor: n-by-ldc]. + On exit, the array C is overwritten by the lower/upper + triangular part of the updated matrix. + + \param[in] ldc + ldc is INTEGER + On entry, ldc specifies the first dimension of C + ldc >= max(1, n) + */ +template< typename T > +void her2k( + CBLAS_ORDER layout, + CBLAS_UPLO uplo, + CBLAS_TRANSPOSE trans, + int64_t n, int64_t k, + T alpha, + T const *A, int64_t lda, + T const *B, int64_t ldb, + real_type beta, + T *C, int64_t ldc ) +{ + cblas_her2k( layout, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc ); +} } // namespace blis - #endif // #ifndef BLIS_HH diff --git a/test/Makefile b/test/Makefile index 5c4aca248..a2e0391d5 100644 --- a/test/Makefile +++ b/test/Makefile @@ -148,10 +148,10 @@ CBLAS_HEADER_PATH = ../frame/compat/cblas/src CINCFLAGS := -I$(INC_PATH) -I$(CBLAS_HEADER_PATH) # Use the CFLAGS for the configuration family. -override CFLAGS += $(call get-user-cflags-for,$(CONFIG_NAME)) +CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS -override CFLAGS += -I$(TEST_SRC_PATH) +CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) diff --git a/test/test_gemm.c b/test/test_gemm.c index c2730e68d..a0908965a 100644 --- a/test/test_gemm.c +++ b/test/test_gemm.c @@ -91,22 +91,13 @@ int main( int argc, char** argv ) n_input = 4; #endif -#if 0 +#if 1 // dt = BLIS_FLOAT; -// dt = BLIS_DOUBLE; + dt = BLIS_DOUBLE; #else // dt = BLIS_SCOMPLEX; // dt = BLIS_DCOMPLEX; #endif -#ifdef FLOAT - dt = BLIS_FLOAT; -#elif defined DOUBLE - dt = BLIS_DOUBLE; -#elif defined SCOMPLEX - dt = BLIS_SCOMPLEX; -#elif defined DCOMPLEX - dt = BLIS_DCOMPLEX; -#endif transa = BLIS_NO_TRANSPOSE; diff --git a/testcpp/Makefile b/testcpp/Makefile index 3f2dc1327..62d0de335 100644 --- a/testcpp/Makefile +++ b/testcpp/Makefile @@ -1,3 +1,5 @@ +# +# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. @@ -115,11 +117,7 @@ CINCFLAGS := -I$(INC_PATH) CXX = g++ # Use the CFLAGS for the configuration family. -<<<<<<< HEAD -override CFLAGS += $(call get-sandbox-cxxflags-for,$(CONFIG_NAME)) -======= override CFLAGS += $(call get-user-cflags-for,$(CONFIG_NAME)) ->>>>>>> Code Cleanup done; Test code updated to add performance measurement # Add local header paths to CFLAGS #CFLAGS = -O0 -g -Wall @@ -140,150 +138,15 @@ LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Complete list of possible targets when defining 'all': # -<<<<<<< HEAD # blis # +#all: blis all: blis - -blis: test_asum_blis.x \ - test_axpy_blis.x \ - test_copy_blis.x \ - test_dot_blis.x \ - test_dotc_blis.x \ - test_gbmv_blis.x \ - test_gemm_blis.x \ - test_gemv_blis.x \ - test_ger_blis.x \ - test_gerc_blis.x \ - test_geru_blis.x \ - test_hemm_blis.x \ - test_hemv_blis.x \ - test_her2_blis.x \ - test_her_blis.x \ - test_herk_blis.x \ - test_hpr2_blis.x \ - test_hpr_blis.x \ - test_nrm2_blis.x \ - test_rot_blis.x \ - test_rotg_blis.x \ - test_rotm_blis.x \ - test_rotmg_blis.x \ - test_scal_blis.x \ - test_sdsdot_blis.x \ - test_spr2_blis.x \ - test_spr_blis.x \ - test_swap_blis.x \ - test_symm_blis.x \ - test_syr2_blis.x \ - test_syr2k_blis.x \ - test_syr_blis.x \ - test_syrk_blis.x \ - test_tbmv_blis.x \ - test_tbsv_blis.x \ - test_tpmv_blis.x \ - test_tpsv_blis.x \ - test_trmm_blis.x \ - test_trsm_blis.x \ - test_trsv_blis.x - -======= -# blis openblas atlas mkl mac essl -# -#all: blis openblas atlas mkl -all: blis openblas mkl - -blis: test_gemm_blis.x \ - test_gemm1_blis.x - -openblas: \ - test_dotv_openblas.x \ - test_axpyv_openblas.x \ - test_gemv_openblas.x \ - test_ger_openblas.x \ - test_hemv_openblas.x \ - test_her_openblas.x \ - test_her2_openblas.x \ - test_trmv_openblas.x \ - test_trsv_openblas.x \ - \ - test_gemm_openblas.x \ - test_hemm_openblas.x \ - test_herk_openblas.x \ - test_her2k_openblas.x \ - test_trmm_openblas.x \ - test_trsm_openblas.x - -atlas: \ - test_dotv_atlas.x \ - test_axpyv_atlas.x \ - test_gemv_atlas.x \ - test_ger_atlas.x \ - test_hemv_atlas.x \ - test_her_atlas.x \ - test_her2_atlas.x \ - test_trmv_atlas.x \ - test_trsv_atlas.x \ - \ - test_gemm_atlas.x \ - test_hemm_atlas.x \ - test_herk_atlas.x \ - test_her2k_atlas.x \ - test_trmm_atlas.x \ - test_trsm_atlas.x - -mkl: test_dotv_mkl.x \ - test_axpyv_mkl.x \ - test_gemv_mkl.x \ - test_ger_mkl.x \ - test_hemv_mkl.x \ - test_her_mkl.x \ - test_her2_mkl.x \ - test_trmv_mkl.x \ - test_trsv_mkl.x \ - \ - test_gemm_mkl.x \ - test_hemm_mkl.x \ - test_herk_mkl.x \ - test_her2k_mkl.x \ - test_trmm_mkl.x \ - test_trsm_mkl.x - -essl: test_dotv_essl.x \ - test_axpyv_essl.x \ - test_gemv_essl.x \ - test_ger_essl.x \ - test_hemv_essl.x \ - test_her_essl.x \ - test_her2_essl.x \ - test_trmv_essl.x \ - test_trsv_essl.x \ - \ - test_gemm_essl.x \ - test_hemm_essl.x \ - test_herk_essl.x \ - test_her2k_essl.x \ - test_trmm_essl.x \ - test_trsm_essl.x - -mac: test_dotv_mac.x \ - test_axpyv_mac.x \ - test_gemv_mac.x \ - test_ger_mac.x \ - test_hemv_mac.x \ - test_her_mac.x \ - test_her2_mac.x \ - test_trmv_mac.x \ - test_trsv_mac.x \ - \ - test_gemm_mac.x \ - test_hemm_mac.x \ - test_herk_mac.x \ - test_her2k_mac.x \ - test_trmm_mac.x \ - test_trsm_mac.x - ->>>>>>> Code Cleanup done; Test code updated to add performance measurement +blis: test_trsm_blis.x \ + test_symm_blis.x \ + test_hemm_blis.x \ + test_gemm_blis.x # --Object file rules -- @@ -292,18 +155,18 @@ $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.cc $(CXX) $(CFLAGS) -c $< -o $@ test_%_blis.o: test_%.cc -<<<<<<< HEAD - @$(CXX) $(CFLAGS) -c $< -o $@ -======= $(CXX) $(CFLAGS) -c $< -o $@ ->>>>>>> Code Cleanup done; Test code updated to add performance measurement # -- Executable file rules -- +# NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS +# on the link command line in case BLIS was configured with the BLAS +# compatibility layer. This prevents BLIS from inadvertently getting called +# for the BLAS routines we are trying to test with. + test_%_blis.x: test_%_blis.o $(LIBBLIS_LINK) - @$(LINKER) $^ $(LIBBLIS_LINK) $(LDFLAGS) -o $@ - ./$@ + $(LINKER) $^ $(LIBBLIS_LINK) $(LDFLAGS) -o $@ # -- Clean rules -- diff --git a/testcpp/test_gemm1.cc b/testcpp/test_gemm1.cc deleted file mode 100644 index c428ebbe7..000000000 --- a/testcpp/test_gemm1.cc +++ /dev/null @@ -1,421 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include - -#include -#include -#include -#include -#include "blis.hh" -using namespace std; - - -//#define FILE_IN_OUT // File based input matrix dimensions - - -#define PRINT - -int main( int argc, char** argv ) -{ - obj_t a, b, c; - obj_t c_save; - obj_t alpha, beta; - dim_t m, n, k; - dim_t p; - dim_t p_begin, p_end, p_inc; - int m_input, n_input, k_input; - num_t dt; - int r, n_repeats; - trans_t transa; - trans_t transb; -// f77_char f77_transa; -// f77_char f77_transb; - - double dtime; - double dtime_save; - double gflops; - - -#ifdef FILE_IN_OUT - FILE* fin = NULL; - FILE* fout = NULL; - char gemm = 's'; - -#endif - - //bli_init(); - - //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); - - n_repeats = 100; - -#ifndef PRINT - p_begin = 200; - p_end = 2000; - p_inc = 200; - - m_input = -1; - n_input = -1; - k_input = -1; -#else - p_begin = 16; - p_end = 16; - p_inc = 1; - - m_input = 5; - k_input = 6; - n_input = 4; -#endif - -#ifdef FLOAT - dt = BLIS_FLOAT; -#elif defined DOUBLE - dt = BLIS_DOUBLE; -#elif defined SCOMPLEX - dt = BLIS_SCOMPLEX; -#elif defined DCOMPLEX - dt = BLIS_DCOMPLEX; -#endif - - transa = BLIS_NO_TRANSPOSE; - transb = BLIS_NO_TRANSPOSE; - -// bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); -// bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); - - -#ifdef FILE_IN_OUT - if (argc < 3) - { - printf("Usage: ./test_gemm_XX.x input.csv output.csv\n"); - exit(1); - } - fin = fopen(argv[1], "r"); - if (fin == NULL) - { - printf("Error opening the file %s\n", argv[1]); - exit(1); - } - fout = fopen(argv[2], "w"); - if (fout == NULL) - { - printf("Error opening output file %s\n", argv[2]); - exit(1); - } - fprintf(fout, "m\t k\t n\t cs_a\t cs_b\t cs_c\t gflops\t GEMM_Algo\n"); - - - printf("~~~~~~~~~~_BLAS\t m\t k\t n\t cs_a\t cs_b\t cs_c \t gflops\t GEMM_Algo\n"); - - inc_t cs_a; - inc_t cs_b; - inc_t cs_c; - - while (fscanf(fin, "%ld %ld %ld %ld %ld %ld\n", &m, &k, &n, &cs_a, &cs_b, &cs_c) == 6) - { - if ((m > cs_a) || (k > cs_b) || (m > cs_c)) continue; // leading dimension should be greater than number of rows - - bli_obj_create( dt, 1, 1, 0, 0, &alpha); - bli_obj_create( dt, 1, 1, 0, 0, &beta ); - - bli_obj_create( dt, m, k, 1, cs_a, &a ); - bli_obj_create( dt, k, n, 1, cs_b, &b ); - bli_obj_create( dt, m, n, 1, cs_c, &c ); - bli_obj_create( dt, m, n, 1, cs_c, &c_save ); - - bli_obj_set_conjtrans( transa, &a); - bli_obj_set_conjtrans( transb, &b); - - //bli_setsc( 0.0, -1, &alpha ); - //bli_setsc( 0.0, 1, &beta ); - - bli_setsc( -1, 0.0, &alpha ); - bli_setsc( 1, 0.0, &beta ); - - // printf("%1.1f %1.1f\n", *((double *)bli_obj_buffer_for_const(BLIS_FLOAT, &alpha)), *((double *)bli_obj_buffer_for_const(BLIS_FLOAT, &beta))); - -#else - for ( p = p_begin; p <= p_end; p += p_inc ) - { - if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); - else m = ( dim_t ) m_input; - if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); - else n = ( dim_t ) n_input; - if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); - else k = ( dim_t ) k_input; - - bli_obj_create( dt, 1, 1, 0, 0, &alpha ); - bli_obj_create( dt, 1, 1, 0, 0, &beta ); - - bli_obj_create( dt, m, k, 0, 0, &a ); - bli_obj_create( dt, k, n, 0, 0, &b ); - bli_obj_create( dt, m, n, 0, 0, &c ); - bli_obj_create( dt, m, n, 0, 0, &c_save ); - - bli_randm( &a ); - bli_randm( &b ); - bli_randm( &c ); - - bli_obj_set_conjtrans( transa, &a ); - bli_obj_set_conjtrans( transb, &b ); - - bli_setsc( (0.9/1.0), 0.2, &alpha ); - bli_setsc( -(1.1/1.0), 0.3, &beta ); -#endif - - bli_copym( &c, &c_save ); - - dtime_save = DBL_MAX; - - for ( r = 0; r < n_repeats; ++r ) - { - bli_copym( &c_save, &c ); - - - dtime = bli_clock(); - - -#ifdef PRINT - bli_printm( "a", &a, "%4.1f", "" ); - bli_printm( "b", &b, "%4.1f", "" ); - bli_printm( "c", &c, "%4.1f", "" ); -#endif - -#if 0 - - bli_gemm( &alpha, - &a, - &b, - &beta, - &c ); - -#else - - if ( bli_is_float( dt ) ) - { - int M = bli_obj_length( &c ); - int K = bli_obj_width_after_trans( &a ); - int N = bli_obj_width( &c ); - int lda = bli_obj_col_stride( &a ); - int ldb = bli_obj_col_stride( &b ); - int ldc = bli_obj_col_stride( &c ); - float* alphap = (float *)bli_obj_buffer( &alpha ); - float* ap = (float *)bli_obj_buffer( &a ); - float* bp = (float *)bli_obj_buffer( &b ); - float* betap = (float *)bli_obj_buffer( &beta ); - float* cp = (float *)bli_obj_buffer( &c ); - - blis::gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, *alphap, ap, - lda, bp, ldb, *betap, cp, ldc); - -#if 0 - sgemm_( &f77_transa, - &f77_transb, - &mm, - &nn, - &kk, - alphap, - ap, &lda, - bp, &ldb, - betap, - cp, &ldc ); -#endif - } - else if ( bli_is_double( dt ) ) - { - int M = bli_obj_length( &c ); - int K = bli_obj_width_after_trans( &a ); - int N = bli_obj_width( &c ); - int lda = bli_obj_col_stride( &a ); - int ldb = bli_obj_col_stride( &b ); - int ldc = bli_obj_col_stride( &c ); - double* alphap = (double*)bli_obj_buffer( &alpha ); - double* ap = (double*)bli_obj_buffer( &a ); - double* bp = (double*)bli_obj_buffer( &b ); - double* betap = (double*)bli_obj_buffer( &beta ); - double* cp = (double*)bli_obj_buffer( &c ); - - blis::gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, *alphap, ap, - lda, bp, ldb, *betap, cp, ldc); -#if 0 - dgemm_( &f77_transa, - &f77_transb, - &mm, - &nn, - &kk, - alphap, - ap, &lda, - bp, &ldb, - betap, - cp, &ldc ); -#endif - } - else if ( bli_is_scomplex( dt ) ) - { - int M = bli_obj_length( &c ); - int K = bli_obj_width_after_trans( &a ); - int N = bli_obj_width( &c ); - int lda = bli_obj_col_stride( &a ); - int ldb = bli_obj_col_stride( &b ); - int ldc = bli_obj_col_stride( &c ); - std::complex* alphap = (std::complex*)bli_obj_buffer( &alpha ); - std::complex* ap = (std::complex*)bli_obj_buffer( &a ); - std::complex* bp = (std::complex*)bli_obj_buffer( &b ); - std::complex* betap = (std::complex*)bli_obj_buffer( &beta ); - std::complex* cp = (std::complex*)bli_obj_buffer( &c ); - - blis::gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, *alphap, ap, - lda, bp, ldb, *betap, cp, ldc); -#if 0 - cgemm_( &f77_transa, - &f77_transb, - &mm, - &nn, - &kk, - alphap, - ap, &lda, - bp, &ldb, - betap, - cp, &ldc ); -#endif - } - else if ( bli_is_dcomplex( dt ) ) - { - f77_int M = bli_obj_length( &c ); - f77_int K = bli_obj_width_after_trans( &a ); - f77_int N = bli_obj_width( &c ); - f77_int lda = bli_obj_col_stride( &a ); - f77_int ldb = bli_obj_col_stride( &b ); - f77_int ldc = bli_obj_col_stride( &c ); - std::complex* alphap = (std::complex*)bli_obj_buffer( &alpha ); - std::complex* ap = (std::complex*)bli_obj_buffer( &a ); - std::complex* bp = (std::complex*)bli_obj_buffer( &b ); - std::complex* betap = (std::complex*)bli_obj_buffer( &beta ); - std::complex* cp = (std::complex*)bli_obj_buffer( &c ); - - blis::gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, *alphap, ap, - lda, bp, ldb, *betap, cp, ldc); -#if 0 - zgemm_( &f77_transa, - &f77_transb, - &mm, - &nn, - &kk, - alphap, - ap, &lda, - bp, &ldb, - betap, - cp, &ldc ); -#endif - } -#endif - -#ifdef PRINT - bli_printm( "c after", &c, "%4.1f", "" ); - exit(1); -#endif - - - dtime_save = bli_clock_min_diff( dtime_save, dtime ); - } - - gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); - - if ( bli_is_complex( dt ) ) gflops *= 4.0; - -#ifdef BLIS - printf( "data_gemm_blis" ); -#else - //printf( "data_gemm_%s", BLAS ); -#endif - - -#ifdef FILE_IN_OUT - - if ( bli_is_double( dt ) ) { - - if (((m * n) < (BLIS_SMALL_MATRIX_THRES * BLIS_SMALL_MATRIX_THRES/4)) || ((m < (BLIS_SMALL_M_RECT_MATRIX_THRES/2) ) && (k < (BLIS_SMALL_K_RECT_MATRIX_THRES/2) ))) - gemm = 'S'; // small gemm - else gemm = 'N'; // Normal blis gemm - - } - else if (bli_is_float( dt )) { - if (((m * n) < (BLIS_SMALL_MATRIX_THRES * BLIS_SMALL_MATRIX_THRES)) || ((m < BLIS_SMALL_M_RECT_MATRIX_THRES) && (k < BLIS_SMALL_K_RECT_MATRIX_THRES))) - gemm = 'S'; // small gemm - else gemm = 'N'; // normal blis gemm - } - - - - printf("%6lu \t %4lu \t %4lu \t %4lu \t %4lu \t %4lu \t %6.3f \t %c\n", \ - ( unsigned long )m, - ( unsigned long )k, - ( unsigned long )n, (unsigned long)cs_a, (unsigned long)cs_b, (unsigned long)cs_c, gflops, gemm ); - - - fprintf(fout, "%6lu \t %4lu \t %4lu \t %4lu \t %4lu \t %4lu \t %6.3f \t %c\n", \ - ( unsigned long )m, - ( unsigned long )k, - ( unsigned long )n, (unsigned long)cs_a, (unsigned long)cs_b, (unsigned long)cs_c, gflops, gemm ); - fflush(fout); - -#else - printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", - ( unsigned long )(p - p_begin + 1)/p_inc + 1, - ( unsigned long )m, - ( unsigned long )k, - ( unsigned long )n, gflops ); - - -#endif - bli_obj_free( &alpha ); - bli_obj_free( &beta ); - - bli_obj_free( &a ); - bli_obj_free( &b ); - bli_obj_free( &c ); - bli_obj_free( &c_save ); - } - - //bli_finalize(); - -#ifdef FILE_IN_OUT - fclose(fin); - fclose(fout); -#endif - - return 0; -} -