From d925ebeb06fefae9d77f1fbcd9e0029d601aaa29 Mon Sep 17 00:00:00 2001 From: Chandrashekara K R Date: Tue, 23 Aug 2022 14:58:26 +0530 Subject: [PATCH] CBLAS/BLAS interface decoupling for level 3 APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ->In BLIS the cblas interface is implemented as a wrapper around the blas interface. For example the CBLAS api ‘cblas_dgemm’ internally invokes BLAS API ‘dgemm_’. ->If the end user wants to use the different libraries for CBLAS and BLAS, current implantation of BLIS doesn’t allow it and may result in recursion ->This change separate the CBLAS and BLAS implantation by adding and additional level of abstraction. The implementation of the API is moved to the new function which is invoked directly from the CBLAS and BLAS wrappers. AMD-Internal: [SWLCSG-1477] Change-Id: I6218a3e81060fc8045f4de0ace87f708465dfae5 --- frame/compat/bla_gemm.c | 41 +++++- frame/compat/bla_gemm.h | 13 ++ frame/compat/bla_gemm3m.c | 40 +++++- frame/compat/bla_gemm3m.h | 15 +- frame/compat/bla_gemm_amd.c | 82 ++++++++++- frame/compat/bla_gemmt.c | 40 +++++- frame/compat/bla_gemmt.h | 15 +- frame/compat/bla_hemm.c | 38 ++++- frame/compat/bla_hemm.h | 13 ++ frame/compat/bla_her2k.c | 40 +++++- frame/compat/bla_her2k.h | 13 ++ frame/compat/bla_herk.c | 38 ++++- frame/compat/bla_herk.h | 12 ++ frame/compat/bla_symm.c | 38 ++++- frame/compat/bla_symm.h | 13 ++ frame/compat/bla_syr2k.c | 38 ++++- frame/compat/bla_syr2k.h | 13 ++ frame/compat/bla_syrk.c | 36 ++++- frame/compat/bla_syrk.h | 12 ++ frame/compat/bla_trmm.c | 38 ++++- frame/compat/bla_trmm.h | 13 ++ frame/compat/bla_trsm.c | 38 ++++- frame/compat/bla_trsm.h | 13 ++ frame/compat/bla_trsm_amd.c | 108 ++++++++++++-- frame/compat/cblas/src/cblas_f77.h | 74 +++++----- frame/include/bli_macro_defs.h | 4 +- frame/util/bli_util_api_wrap.c | 218 ++++++++++++++--------------- 27 files changed, 846 insertions(+), 210 deletions(-) diff --git a/frame/compat/bla_gemm.c b/frame/compat/bla_gemm.c index d3601952b..ae14196e8 100644 --- a/frame/compat/bla_gemm.c +++ b/frame/compat/bla_gemm.c @@ -44,7 +44,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -136,14 +136,30 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -318,7 +334,24 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ + #endif #ifdef BLIS_ENABLE_BLAS diff --git a/frame/compat/bla_gemm.h b/frame/compat/bla_gemm.h index c9ea83149..d8fe6ddb9 100644 --- a/frame/compat/bla_gemm.h +++ b/frame/compat/bla_gemm.h @@ -41,6 +41,19 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ diff --git a/frame/compat/bla_gemm3m.c b/frame/compat/bla_gemm3m.c index 665c8643d..4ecbba555 100644 --- a/frame/compat/bla_gemm3m.c +++ b/frame/compat/bla_gemm3m.c @@ -44,7 +44,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -131,14 +131,30 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -240,7 +256,23 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_gemm3m.h b/frame/compat/bla_gemm3m.h index 1063d85c0..d64e3f199 100644 --- a/frame/compat/bla_gemm3m.h +++ b/frame/compat/bla_gemm3m.h @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2020-2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +40,19 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ diff --git a/frame/compat/bla_gemm_amd.c b/frame/compat/bla_gemm_amd.c index 2a9dcb99d..6bc0dcd55 100644 --- a/frame/compat/bla_gemm_amd.c +++ b/frame/compat/bla_gemm_amd.c @@ -44,7 +44,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -136,14 +136,32 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ +\ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -318,11 +336,30 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ +\ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ + #endif #ifdef BLIS_ENABLE_BLAS -void dgemm_ +void dgemm_blis_impl ( const f77_char* transa, const f77_char* transb, @@ -658,7 +695,24 @@ void dgemm_ bli_finalize_auto(); } // end of dgemm_ -void zgemm_ +void dgemm_ +( + const f77_char* transa, + const f77_char* transb, + const f77_int* m, + const f77_int* n, + const f77_int* k, + const double* alpha, + const double* a, const f77_int* lda, + const double* b, const f77_int* ldb, + const double* beta, + double* c, const f77_int* ldc +) +{ + dgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void zgemm_blis_impl ( const f77_char* transa, const f77_char* transb, @@ -915,6 +969,22 @@ void zgemm_ bli_finalize_auto(); }// end of zgemm_ +void zgemm_ + ( + const f77_char* transa, + const f77_char* transb, + const f77_int* m, + const f77_int* n, + const f77_int* k, + const dcomplex* alpha, + const dcomplex* a, const f77_int* lda, + const dcomplex* b, const f77_int* ldb, + const dcomplex* beta, + dcomplex* c, const f77_int* ldc + ) +{ + zgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} INSERT_GENTFUNC_BLAS_SC( gemm, gemm ) diff --git a/frame/compat/bla_gemmt.c b/frame/compat/bla_gemmt.c index 7abad40ac..f8f6fa2de 100644 --- a/frame/compat/bla_gemmt.c +++ b/frame/compat/bla_gemmt.c @@ -44,7 +44,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -134,14 +134,30 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -247,7 +263,23 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_gemmt.h b/frame/compat/bla_gemmt.h index 8043d6829..d4efb995c 100644 --- a/frame/compat/bla_gemmt.h +++ b/frame/compat/bla_gemmt.h @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020-2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +40,19 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_hemm.c b/frame/compat/bla_hemm.c index 0e003012d..ed3cbb517 100644 --- a/frame/compat/bla_hemm.c +++ b/frame/compat/bla_hemm.c @@ -45,7 +45,7 @@ #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -132,14 +132,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #else #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -248,7 +263,22 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #endif diff --git a/frame/compat/bla_hemm.h b/frame/compat/bla_hemm.h index 711877ede..7054be7c9 100644 --- a/frame/compat/bla_hemm.h +++ b/frame/compat/bla_hemm.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ diff --git a/frame/compat/bla_her2k.c b/frame/compat/bla_her2k.c index e21a2cda4..cba6432eb 100755 --- a/frame/compat/bla_her2k.c +++ b/frame/compat/bla_her2k.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2021, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -45,7 +45,7 @@ #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -137,14 +137,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #else #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -258,7 +273,22 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #endif diff --git a/frame/compat/bla_her2k.h b/frame/compat/bla_her2k.h index c771f78d4..a3fa41302 100644 --- a/frame/compat/bla_her2k.h +++ b/frame/compat/bla_her2k.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_herk.c b/frame/compat/bla_herk.c index 36188e6a6..b07ee180c 100755 --- a/frame/compat/bla_herk.c +++ b/frame/compat/bla_herk.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2021, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -45,7 +45,7 @@ #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -131,14 +131,28 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype_r* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ + } \ #else #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -242,7 +256,21 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype_r* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_herk.h b/frame/compat/bla_herk.h index e649a74ab..8ec9183e8 100644 --- a/frame/compat/bla_herk.h +++ b/frame/compat/bla_herk.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,17 @@ #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype_r* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_symm.c b/frame/compat/bla_symm.c index 85aebb435..7b915a5ed 100755 --- a/frame/compat/bla_symm.c +++ b/frame/compat/bla_symm.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -131,14 +131,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -246,7 +261,22 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_symm.h b/frame/compat/bla_symm.h index b186e4b43..f10e1cbb8 100644 --- a/frame/compat/bla_symm.h +++ b/frame/compat/bla_symm.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ diff --git a/frame/compat/bla_syr2k.c b/frame/compat/bla_syr2k.c index 6a4f31b96..751e008ae 100644 --- a/frame/compat/bla_syr2k.c +++ b/frame/compat/bla_syr2k.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -139,14 +139,29 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -262,7 +277,22 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_syr2k.h b/frame/compat/bla_syr2k.h index 91d9a3acf..fc127d9ea 100644 --- a/frame/compat/bla_syr2k.h +++ b/frame/compat/bla_syr2k.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_syrk.c b/frame/compat/bla_syrk.c index 376b23aec..b2ec611f5 100644 --- a/frame/compat/bla_syrk.c +++ b/frame/compat/bla_syrk.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -133,14 +133,28 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -245,7 +259,21 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_syrk.h b/frame/compat/bla_syrk.h index b6ca938a6..c87dc6694 100644 --- a/frame/compat/bla_syrk.h +++ b/frame/compat/bla_syrk.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,17 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_trmm.c b/frame/compat/bla_trmm.c index c319b3ab5..59c64b90e 100644 --- a/frame/compat/bla_trmm.c +++ b/frame/compat/bla_trmm.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -131,14 +131,29 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -239,7 +254,22 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #endif diff --git a/frame/compat/bla_trmm.h b/frame/compat/bla_trmm.h index 4f0c20b1b..10cbb6cbc 100644 --- a/frame/compat/bla_trmm.h +++ b/frame/compat/bla_trmm.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ diff --git a/frame/compat/bla_trsm.c b/frame/compat/bla_trsm.c index e99805d8d..f709a8cd0 100644 --- a/frame/compat/bla_trsm.c +++ b/frame/compat/bla_trsm.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -130,14 +130,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -393,7 +408,22 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #endif diff --git a/frame/compat/bla_trsm.h b/frame/compat/bla_trsm.h index 5694db52a..af1b626df 100644 --- a/frame/compat/bla_trsm.h +++ b/frame/compat/bla_trsm.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ diff --git a/frame/compat/bla_trsm_amd.c b/frame/compat/bla_trsm_amd.c index 8ca7434bd..4479725fb 100644 --- a/frame/compat/bla_trsm_amd.c +++ b/frame/compat/bla_trsm_amd.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -130,14 +130,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ + } \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -393,13 +408,28 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #endif #ifdef BLIS_ENABLE_BLAS -void strsm_ +void strsm_blis_impl ( const f77_char* side, const f77_char* uploa, @@ -669,8 +699,23 @@ void strsm_ /* Finalize BLIS. */ bli_finalize_auto(); } +void strsm_ +( + const f77_char* side, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const float* alpha, + const float* a, const f77_int* lda, + float* b, const f77_int* ldb +) +{ + strsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); +} -void dtrsm_ +void dtrsm_blis_impl ( const f77_char* side, const f77_char* uploa, @@ -892,7 +937,7 @@ void dtrsm_ bli_obj_set_conjtrans( blis_transa, &ao ); bli_obj_set_struc( struca, &ao ); - + #ifdef BLIS_ENABLE_SMALL_MATRIX_TRSM // This function is invoked on all architectures including ‘generic’. // Non-AVX platforms will use the kernels derived from the context. @@ -973,9 +1018,24 @@ void dtrsm_ /* Finalize BLIS. */ bli_finalize_auto(); } +void dtrsm_ +( + const f77_char* side, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const double* alpha, + const double* a, const f77_int* lda, + double* b, const f77_int* ldb +) +{ + dtrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); +} -void ztrsm_ +void ztrsm_blis_impl ( const f77_char* side, const f77_char* uploa, @@ -1331,9 +1391,24 @@ void ztrsm_ /* Finalize BLIS. */ bli_finalize_auto(); } +void ztrsm_ +( + const f77_char* side, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const dcomplex* alpha, + const dcomplex* a, const f77_int* lda, + dcomplex* b, const f77_int* ldb +) +{ + ztrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); +} -void ctrsm_ +void ctrsm_blis_impl ( const f77_char* side, const f77_char* uploa, @@ -1664,5 +1739,20 @@ void ctrsm_ /* Finalize BLIS. */ bli_finalize_auto(); } +void ctrsm_ +( + const f77_char* side, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const scomplex* alpha, + const scomplex* a, const f77_int* lda, + scomplex* b, const f77_int* ldb +) +{ + ctrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); +} #endif diff --git a/frame/compat/cblas/src/cblas_f77.h b/frame/compat/cblas/src/cblas_f77.h index fabf3efb1..5ec518de9 100644 --- a/frame/compat/cblas/src/cblas_f77.h +++ b/frame/compat/cblas/src/cblas_f77.h @@ -7,7 +7,7 @@ * * (Heavily hacked down from the original) * - * Copyright (C) 2020 - 2021, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2020 - 2022, Advanced Micro Devices, Inc. All rights reserved. * */ @@ -326,40 +326,40 @@ /* * Level 3 BLAS */ -#define F77_chemm chemm_ -#define F77_cherk cherk_ -#define F77_cher2k cher2k_ -#define F77_zhemm zhemm_ -#define F77_zherk zherk_ -#define F77_zher2k zher2k_ -#define F77_sgemm sgemm_ -#define F77_ssymm ssymm_ -#define F77_ssyrk ssyrk_ -#define F77_ssyr2k ssyr2k_ -#define F77_strmm strmm_ -#define F77_strsm strsm_ -#define F77_dgemm dgemm_ -#define F77_dsymm dsymm_ -#define F77_dsyrk dsyrk_ -#define F77_dsyr2k dsyr2k_ -#define F77_dtrmm dtrmm_ -#define F77_dtrsm dtrsm_ -#define F77_cgemm cgemm_ -#define F77_csymm csymm_ -#define F77_csyrk csyrk_ -#define F77_csyr2k csyr2k_ -#define F77_ctrmm ctrmm_ -#define F77_ctrsm ctrsm_ -#define F77_zgemm zgemm_ -#define F77_zsymm zsymm_ -#define F77_zsyrk zsyrk_ -#define F77_zsyr2k zsyr2k_ -#define F77_ztrmm ztrmm_ -#define F77_ztrsm ztrsm_ -#define F77_dgemmt dgemmt_ -#define F77_sgemmt sgemmt_ -#define F77_cgemmt cgemmt_ -#define F77_zgemmt zgemmt_ +#define F77_chemm chemm_blis_impl +#define F77_cherk cherk_blis_impl +#define F77_cher2k cher2k_blis_impl +#define F77_zhemm zhemm_blis_impl +#define F77_zherk zherk_blis_impl +#define F77_zher2k zher2k_blis_impl +#define F77_sgemm sgemm_blis_impl +#define F77_ssymm ssymm_blis_impl +#define F77_ssyrk ssyrk_blis_impl +#define F77_ssyr2k ssyr2k_blis_impl +#define F77_strmm strmm_blis_impl +#define F77_strsm strsm_blis_impl +#define F77_dgemm dgemm_blis_impl +#define F77_dsymm dsymm_blis_impl +#define F77_dsyrk dsyrk_blis_impl +#define F77_dsyr2k dsyr2k_blis_impl +#define F77_dtrmm dtrmm_blis_impl +#define F77_dtrsm dtrsm_blis_impl +#define F77_cgemm cgemm_blis_impl +#define F77_csymm csymm_blis_impl +#define F77_csyrk csyrk_blis_impl +#define F77_csyr2k csyr2k_blis_impl +#define F77_ctrmm ctrmm_blis_impl +#define F77_ctrsm ctrsm_blis_impl +#define F77_zgemm zgemm_blis_impl +#define F77_zsymm zsymm_blis_impl +#define F77_zsyrk zsyrk_blis_impl +#define F77_zsyr2k zsyr2k_blis_impl +#define F77_ztrmm ztrmm_blis_impl +#define F77_ztrsm ztrsm_blis_impl +#define F77_dgemmt dgemmt_blis_impl +#define F77_sgemmt sgemmt_blis_impl +#define F77_cgemmt cgemmt_blis_impl +#define F77_zgemmt zgemmt_blis_impl /* * Aux Function @@ -375,8 +375,8 @@ #define F77_daxpby daxpby_ #define F77_caxpby caxpby_ #define F77_zaxpby zaxpby_ -#define F77_cgemm3m cgemm3m_ -#define F77_zgemm3m zgemm3m_ +#define F77_cgemm3m cgemm3m_blis_impl +#define F77_zgemm3m zgemm3m_blis_impl #define F77_isamin_sub isaminsub_ #define F77_idamin_sub idaminsub_ diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index f29fdc1fe..75b9c9fdc 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018-2021, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -162,11 +162,13 @@ #define PASTEF77(ch1,name) ch1 ## name #define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name #define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name +#define PASTEF77S(ch1,name) ch1 ## name ## _blis_impl #else #define PASTEF770(name) name ## _ #define PASTEF77(ch1,name) ch1 ## name ## _ #define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name ## _ #define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name ## _ +#define PASTEF77S(ch1,name) ch1 ## name ## _blis_impl #endif // -- Include other groups of macros diff --git a/frame/util/bli_util_api_wrap.c b/frame/util/bli_util_api_wrap.c index 81300761f..9e8d1ccc3 100644 --- a/frame/util/bli_util_api_wrap.c +++ b/frame/util/bli_util_api_wrap.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2021-2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -210,17 +210,17 @@ void CGBMV_(const char *trans,const f77_int *m,const f77_int *n,const f77_int void CGEMM(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - cgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void cgemm(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - cgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMM_(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - cgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMV(const char *trans,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *x,const f77_int *incx,const scomplex *beta,scomplex *y,const f77_int *incy) @@ -285,17 +285,17 @@ void CHBMV_(const char *uplo,const f77_int *n,const f77_int *k,const scomplex void CHEMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - chemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + chemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void chemm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - chemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + chemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void CHEMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - chemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + chemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void CHEMV(const char *uplo,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *x,const f77_int *incx,const scomplex *beta,scomplex *y,const f77_int *incy) @@ -345,32 +345,32 @@ void CHER2_(const char *uplo,const f77_int *n,const scomplex *alpha,const sco void CHER2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const float *beta,scomplex *c,const f77_int *ldc) { - cher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void cher2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const float *beta,scomplex *c,const f77_int *ldc) { - cher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CHER2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const float *beta,scomplex *c,const f77_int *ldc) { - cher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CHERK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const scomplex *a,const f77_int *lda,const float *beta,scomplex *c,const f77_int *ldc) { - cherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + cherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void cherk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const scomplex *a,const f77_int *lda,const float *beta,scomplex *c,const f77_int *ldc) { - cherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + cherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void CHERK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const scomplex *a,const f77_int *lda,const float *beta,scomplex *c,const f77_int *ldc) { - cherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + cherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void CHPMV(const char *uplo,const f77_int *n,const scomplex *alpha,const scomplex *ap,const scomplex *x,const f77_int *incx,const scomplex *beta,scomplex *y,const f77_int *incy) @@ -495,47 +495,47 @@ void CSWAP_(const f77_int *n,scomplex *cx,const f77_int *incx,scomplex *cy,con void CSYMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + csymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void csymm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + csymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void CSYMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + csymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void CSYR2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + csyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void csyr2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + csyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CSYR2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + csyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CSYRK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + csyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void csyrk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + csyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void CSYRK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + csyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void CTBMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const f77_int *k,const scomplex *a,const f77_int *lda,scomplex *x,const f77_int *incx) @@ -600,17 +600,17 @@ void CTPSV_(const char *uplo,const char *trans,const char *diag,const f77_ void CTRMM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ctrmm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void CTRMM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void CTRMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const scomplex *a,const f77_int *lda,scomplex *x,const f77_int *incx) @@ -630,17 +630,17 @@ void CTRMV_(const char *uplo,const char *trans,const char *diag,const f77_ void CTRSM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ctrsm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void CTRSM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void CTRSV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const scomplex *a,const f77_int *lda,scomplex *x,const f77_int *incx) @@ -750,17 +750,17 @@ void DGBMV_(const char *trans,const f77_int *m,const f77_int *n,const f77_int void DGEMM(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void dgemm(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DGEMM_(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DGEMV(const char *trans,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *x,const f77_int *incx,const double *beta,double *y,const f77_int *incy) @@ -975,17 +975,17 @@ void DSWAP_(const f77_int *n,double *dx,const f77_int *incx,double *dy,const f77 void DSYMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + dsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void dsymm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + dsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void DSYMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + dsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void DSYMV(const char *uplo,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *x,const f77_int *incx,const double *beta,double *y,const f77_int *incy) @@ -1035,32 +1035,32 @@ void DSYR2_(const char *uplo,const f77_int *n,const double *alpha,const double void DSYR2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void dsyr2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DSYR2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DSYRK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *beta,double *c,const f77_int *ldc) { - dsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + dsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void dsyrk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *beta,double *c,const f77_int *ldc) { - dsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + dsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void DSYRK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *beta,double *c,const f77_int *ldc) { - dsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + dsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void DTBMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const f77_int *k,const double *a,const f77_int *lda,double *x,const f77_int *incx) @@ -1125,17 +1125,17 @@ void DTPSV_(const char *uplo,const char *trans,const char *diag,const f77_ void DTRMM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void dtrmm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void DTRMM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void DTRMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const double *a,const f77_int *lda,double *x,const f77_int *incx) @@ -1155,17 +1155,17 @@ void DTRMV_(const char *uplo,const char *trans,const char *diag,const f77_ void DTRSM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void dtrsm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void DTRSM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void DTRSV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const double *a,const f77_int *lda,double *x,const f77_int *incx) @@ -1417,17 +1417,17 @@ void SGBMV_(const char *trans,const f77_int *m,const f77_int *n,const f77_int void SGEMM(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - sgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void sgemm(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - sgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SGEMM_(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - sgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SGEMV(const char *trans,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *x,const f77_int *incx,const float *beta,float *y,const f77_int *incy) @@ -1629,17 +1629,17 @@ void SSWAP_(const f77_int *n,float *sx,const f77_int *incx,float *sy,const f77 void SSYMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + ssymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ssymm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + ssymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void SSYMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + ssymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void SSYMV(const char *uplo,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *x,const f77_int *incx,const float *beta,float *y,const f77_int *incy) @@ -1689,32 +1689,32 @@ void SSYR2_(const char *uplo,const f77_int *n,const float *alpha,const float void SSYR2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + ssyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ssyr2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + ssyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SSYR2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + ssyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SSYRK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *beta,float *c,const f77_int *ldc) { - ssyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + ssyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ssyrk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *beta,float *c,const f77_int *ldc) { - ssyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + ssyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void SSYRK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *beta,float *c,const f77_int *ldc) { - ssyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + ssyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void STBMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const f77_int *k,const float *a,const f77_int *lda,float *x,const f77_int *incx) @@ -1779,17 +1779,17 @@ void STPSV_(const char *uplo,const char *trans,const char *diag,const f77_ void STRMM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void strmm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void STRMM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void STRMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const float *a,const f77_int *lda,float *x,const f77_int *incx) @@ -1809,17 +1809,17 @@ void STRMV_(const char *uplo,const char *trans,const char *diag,const f77_ void STRSM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void strsm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void STRSM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void STRSV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const float *a,const f77_int *lda,float *x,const f77_int *incx) @@ -1929,17 +1929,17 @@ void ZGBMV_(const char *trans,const f77_int *m,const f77_int *n,const f77_int void ZGEMM(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zgemm(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMM_(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMV(const char *trans,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *x,const f77_int *incx,const dcomplex *beta,dcomplex *y,const f77_int *incy) @@ -2004,17 +2004,17 @@ void ZHBMV_(const char *uplo,const f77_int *n,const f77_int *k,const dcomplex void ZHEMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zhemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zhemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void zhemm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zhemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zhemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ZHEMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zhemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zhemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ZHEMV(const char *uplo,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *x,const f77_int *incx,const dcomplex *beta,dcomplex *y,const f77_int *incy) @@ -2064,32 +2064,32 @@ void ZHER2_(const char *uplo,const f77_int *n,const dcomplex *alpha,const dcom void ZHER2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const double *beta,dcomplex *c,const f77_int *ldc) { - zher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zher2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const double *beta,dcomplex *c,const f77_int *ldc) { - zher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZHER2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const double *beta,dcomplex *c,const f77_int *ldc) { - zher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZHERK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const dcomplex *a,const f77_int *lda,const double *beta,dcomplex *c,const f77_int *ldc) { - zherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void zherk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const dcomplex *a,const f77_int *lda,const double *beta,dcomplex *c,const f77_int *ldc) { - zherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ZHERK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const dcomplex *a,const f77_int *lda,const double *beta,dcomplex *c,const f77_int *ldc) { - zherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ZHPMV(const char *uplo,const f77_int *n,const dcomplex *alpha,const dcomplex *ap,const dcomplex *x,const f77_int *incx,const dcomplex *beta,dcomplex *y,const f77_int *incy) @@ -2184,47 +2184,47 @@ void ZSWAP_(const f77_int *n,dcomplex *zx,const f77_int *incx,dcomplex *zy,const void ZSYMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void zsymm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ZSYMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ZSYR2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zsyr2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZSYR2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZSYRK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void zsyrk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ZSYRK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ZTBMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const f77_int *k,const dcomplex *a,const f77_int *lda,dcomplex *x,const f77_int *incx) @@ -2289,17 +2289,17 @@ void ZTPSV_(const char *uplo,const char *trans,const char *diag,const f77_ void ZTRMM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ztrmm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ZTRMM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ZTRMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const dcomplex *a,const f77_int *lda,dcomplex *x,const f77_int *incx) @@ -2319,17 +2319,17 @@ void ZTRMV_(const char *uplo,const char *trans,const char *diag,const f77_ void ZTRSM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ztrsm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ZTRSM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ZTRSV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const dcomplex *a,const f77_int *lda,dcomplex *x,const f77_int *incx) @@ -2380,17 +2380,17 @@ void CDOTUSUB_( const f77_int* n, const scomplex* x,const f77_int* incxy, const void CGEMM3M( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void cgemm3m( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMM3M_( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMM_BATCH( const f77_char* transa_array, const f77_char* transb_array,const f77_int *m_array, const f77_int *n_array, const f77_int *k_array,const scomplex* alpha_array, const scomplex** a_array, const f77_int *lda_array, const scomplex** b_array, const f77_int *ldb_array, const scomplex* beta_array, scomplex** c_array, const f77_int *ldc_array, const f77_int* group_count, const f77_int *group_size) @@ -2410,17 +2410,17 @@ void CGEMM_BATCH_( const f77_char* transa_array, const f77_char* transb_array,co void CGEMMT( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void cgemmt( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CIMATCOPY(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha,scomplex* aptr, f77_int* lda, f77_int* ldb) @@ -2545,17 +2545,17 @@ void DGEMM_BATCH_( const f77_char* transa_array, const f77_char* transb_array,co void DGEMMT( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const double* alpha, const double* a, const f77_int* lda, const double* b, const f77_int* ldb, const double* beta, double* c, const f77_int* ldc) { - dgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void dgemmt( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const double* alpha, const double* a, const f77_int* lda, const double* b, const f77_int* ldb, const double* beta, double* c, const f77_int* ldc) { - dgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const double* alpha, const double* a, const f77_int* lda, const double* b, const f77_int* ldb, const double* beta, double* c, const f77_int* ldc) { - dgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DNRM2SUB(const f77_int* n, const double* x, const f77_int* incx, double *rval) @@ -2920,17 +2920,17 @@ void SGEMM_BATCH_(const f77_char* transa_array, const f77_char* transb_array,con void SGEMMT( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const float* alpha, const float* a, const f77_int* lda, const float* b, const f77_int* ldb, const float* beta, float* c, const f77_int* ldc) { - sgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void sgemmt( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const float* alpha, const float* a, const f77_int* lda, const float* b, const f77_int* ldb, const float* beta, float* c, const f77_int* ldc) { - sgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const float* alpha, const float* a, const f77_int* lda, const float* b, const f77_int* ldb, const float* beta, float* c, const f77_int* ldc) { - sgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SIMATCOPY( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha,float* aptr, f77_int* lda, f77_int* ldb) @@ -3055,17 +3055,17 @@ void ZDOTUSUB_( const f77_int* n, const dcomplex* x, const f77_int* incx,const d void ZGEMM3M( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zgemm3m( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMM3M_( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMM_BATCH( const f77_char* transa_array, const f77_char* transb_array,const f77_int *m_array, const f77_int *n_array, const f77_int *k_array,const dcomplex* alpha_array, const dcomplex** a_array, const f77_int *lda_array, const dcomplex** b_array, const f77_int *ldb_array, const dcomplex* beta_array, dcomplex** c_array, const f77_int *ldc_array, const f77_int* group_count, const f77_int *group_size) @@ -3085,17 +3085,17 @@ void ZGEMM_BATCH_( const f77_char* transa_array, const f77_char* transb_array,c void ZGEMMT( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zgemmt( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZIMATCOPY(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha,dcomplex* aptr, f77_int* lda, f77_int* ldb)