diff --git a/frame/compat/bla_gemm.c b/frame/compat/bla_gemm.c index d3601952b..ae14196e8 100644 --- a/frame/compat/bla_gemm.c +++ b/frame/compat/bla_gemm.c @@ -44,7 +44,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -136,14 +136,30 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -318,7 +334,24 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ + #endif #ifdef BLIS_ENABLE_BLAS diff --git a/frame/compat/bla_gemm.h b/frame/compat/bla_gemm.h index c9ea83149..d8fe6ddb9 100644 --- a/frame/compat/bla_gemm.h +++ b/frame/compat/bla_gemm.h @@ -41,6 +41,19 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ diff --git a/frame/compat/bla_gemm3m.c b/frame/compat/bla_gemm3m.c index 665c8643d..4ecbba555 100644 --- a/frame/compat/bla_gemm3m.c +++ b/frame/compat/bla_gemm3m.c @@ -44,7 +44,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -131,14 +131,30 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -240,7 +256,23 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_gemm3m.h b/frame/compat/bla_gemm3m.h index 1063d85c0..d64e3f199 100644 --- a/frame/compat/bla_gemm3m.h +++ b/frame/compat/bla_gemm3m.h @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2020-2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +40,19 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ diff --git a/frame/compat/bla_gemm_amd.c b/frame/compat/bla_gemm_amd.c index 2a9dcb99d..6bc0dcd55 100644 --- a/frame/compat/bla_gemm_amd.c +++ b/frame/compat/bla_gemm_amd.c @@ -44,7 +44,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -136,14 +136,32 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ +\ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ @@ -318,11 +336,30 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +\ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ +\ + PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ + #endif #ifdef BLIS_ENABLE_BLAS -void dgemm_ +void dgemm_blis_impl ( const f77_char* transa, const f77_char* transb, @@ -658,7 +695,24 @@ void dgemm_ bli_finalize_auto(); } // end of dgemm_ -void zgemm_ +void dgemm_ +( + const f77_char* transa, + const f77_char* transb, + const f77_int* m, + const f77_int* n, + const f77_int* k, + const double* alpha, + const double* a, const f77_int* lda, + const double* b, const f77_int* ldb, + const double* beta, + double* c, const f77_int* ldc +) +{ + dgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void zgemm_blis_impl ( const f77_char* transa, const f77_char* transb, @@ -915,6 +969,22 @@ void zgemm_ bli_finalize_auto(); }// end of zgemm_ +void zgemm_ + ( + const f77_char* transa, + const f77_char* transb, + const f77_int* m, + const f77_int* n, + const f77_int* k, + const dcomplex* alpha, + const dcomplex* a, const f77_int* lda, + const dcomplex* b, const f77_int* ldb, + const dcomplex* beta, + dcomplex* c, const f77_int* ldc + ) +{ + zgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} INSERT_GENTFUNC_BLAS_SC( gemm, gemm ) diff --git a/frame/compat/bla_gemmt.c b/frame/compat/bla_gemmt.c index 7abad40ac..f8f6fa2de 100644 --- a/frame/compat/bla_gemmt.c +++ b/frame/compat/bla_gemmt.c @@ -44,7 +44,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -134,14 +134,30 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -247,7 +263,23 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_gemmt.h b/frame/compat/bla_gemmt.h index 8043d6829..d4efb995c 100644 --- a/frame/compat/bla_gemmt.h +++ b/frame/compat/bla_gemmt.h @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020-2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +40,19 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_hemm.c b/frame/compat/bla_hemm.c index 0e003012d..ed3cbb517 100644 --- a/frame/compat/bla_hemm.c +++ b/frame/compat/bla_hemm.c @@ -45,7 +45,7 @@ #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -132,14 +132,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #else #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -248,7 +263,22 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #endif diff --git a/frame/compat/bla_hemm.h b/frame/compat/bla_hemm.h index 711877ede..7054be7c9 100644 --- a/frame/compat/bla_hemm.h +++ b/frame/compat/bla_hemm.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ diff --git a/frame/compat/bla_her2k.c b/frame/compat/bla_her2k.c index e21a2cda4..cba6432eb 100755 --- a/frame/compat/bla_her2k.c +++ b/frame/compat/bla_her2k.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2021, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -45,7 +45,7 @@ #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -137,14 +137,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #else #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -258,7 +273,22 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #endif diff --git a/frame/compat/bla_her2k.h b/frame/compat/bla_her2k.h index c771f78d4..a3fa41302 100644 --- a/frame/compat/bla_her2k.h +++ b/frame/compat/bla_her2k.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_herk.c b/frame/compat/bla_herk.c index 36188e6a6..b07ee180c 100755 --- a/frame/compat/bla_herk.c +++ b/frame/compat/bla_herk.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2021, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -45,7 +45,7 @@ #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -131,14 +131,28 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype_r* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ + } \ #else #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -242,7 +256,21 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype_r* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_herk.h b/frame/compat/bla_herk.h index e649a74ab..8ec9183e8 100644 --- a/frame/compat/bla_herk.h +++ b/frame/compat/bla_herk.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,17 @@ #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype_r* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_symm.c b/frame/compat/bla_symm.c index 85aebb435..7b915a5ed 100755 --- a/frame/compat/bla_symm.c +++ b/frame/compat/bla_symm.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -131,14 +131,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -246,7 +261,22 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_symm.h b/frame/compat/bla_symm.h index b186e4b43..f10e1cbb8 100644 --- a/frame/compat/bla_symm.h +++ b/frame/compat/bla_symm.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ diff --git a/frame/compat/bla_syr2k.c b/frame/compat/bla_syr2k.c index 6a4f31b96..751e008ae 100644 --- a/frame/compat/bla_syr2k.c +++ b/frame/compat/bla_syr2k.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -139,14 +139,29 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ + } \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -262,7 +277,22 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_syr2k.h b/frame/compat/bla_syr2k.h index 91d9a3acf..fc127d9ea 100644 --- a/frame/compat/bla_syr2k.h +++ b/frame/compat/bla_syr2k.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_syrk.c b/frame/compat/bla_syrk.c index 376b23aec..b2ec611f5 100644 --- a/frame/compat/bla_syrk.c +++ b/frame/compat/bla_syrk.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -133,14 +133,28 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ @@ -245,7 +259,21 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ +} \ #endif diff --git a/frame/compat/bla_syrk.h b/frame/compat/bla_syrk.h index b6ca938a6..c87dc6694 100644 --- a/frame/compat/bla_syrk.h +++ b/frame/compat/bla_syrk.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,17 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ diff --git a/frame/compat/bla_trmm.c b/frame/compat/bla_trmm.c index c319b3ab5..59c64b90e 100644 --- a/frame/compat/bla_trmm.c +++ b/frame/compat/bla_trmm.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -131,14 +131,29 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -239,7 +254,22 @@ void PASTEF77(ch,blasname) \ /* Finalize BLIS. */ \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1) \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #endif diff --git a/frame/compat/bla_trmm.h b/frame/compat/bla_trmm.h index 4f0c20b1b..10cbb6cbc 100644 --- a/frame/compat/bla_trmm.h +++ b/frame/compat/bla_trmm.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ diff --git a/frame/compat/bla_trsm.c b/frame/compat/bla_trsm.c index e99805d8d..f709a8cd0 100644 --- a/frame/compat/bla_trsm.c +++ b/frame/compat/bla_trsm.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -130,14 +130,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -393,7 +408,22 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #endif diff --git a/frame/compat/bla_trsm.h b/frame/compat/bla_trsm.h index 5694db52a..af1b626df 100644 --- a/frame/compat/bla_trsm.h +++ b/frame/compat/bla_trsm.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -40,6 +41,18 @@ #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ); \ +BLIS_EXPORT_BLAS void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ diff --git a/frame/compat/bla_trsm_amd.c b/frame/compat/bla_trsm_amd.c index 8ca7434bd..4479725fb 100644 --- a/frame/compat/bla_trsm_amd.c +++ b/frame/compat/bla_trsm_amd.c @@ -45,7 +45,7 @@ #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -130,14 +130,29 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ + } \ #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ -void PASTEF77(ch,blasname) \ +void PASTEF77S(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ @@ -393,13 +408,28 @@ void PASTEF77(ch,blasname) \ AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO) \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ -} +} \ +void PASTEF77(ch,blasname) \ + ( \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ + ) \ +{ \ + PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ +} \ #endif #ifdef BLIS_ENABLE_BLAS -void strsm_ +void strsm_blis_impl ( const f77_char* side, const f77_char* uploa, @@ -669,8 +699,23 @@ void strsm_ /* Finalize BLIS. */ bli_finalize_auto(); } +void strsm_ +( + const f77_char* side, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const float* alpha, + const float* a, const f77_int* lda, + float* b, const f77_int* ldb +) +{ + strsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); +} -void dtrsm_ +void dtrsm_blis_impl ( const f77_char* side, const f77_char* uploa, @@ -892,7 +937,7 @@ void dtrsm_ bli_obj_set_conjtrans( blis_transa, &ao ); bli_obj_set_struc( struca, &ao ); - + #ifdef BLIS_ENABLE_SMALL_MATRIX_TRSM // This function is invoked on all architectures including ‘generic’. // Non-AVX platforms will use the kernels derived from the context. @@ -973,9 +1018,24 @@ void dtrsm_ /* Finalize BLIS. */ bli_finalize_auto(); } +void dtrsm_ +( + const f77_char* side, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const double* alpha, + const double* a, const f77_int* lda, + double* b, const f77_int* ldb +) +{ + dtrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); +} -void ztrsm_ +void ztrsm_blis_impl ( const f77_char* side, const f77_char* uploa, @@ -1331,9 +1391,24 @@ void ztrsm_ /* Finalize BLIS. */ bli_finalize_auto(); } +void ztrsm_ +( + const f77_char* side, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const dcomplex* alpha, + const dcomplex* a, const f77_int* lda, + dcomplex* b, const f77_int* ldb +) +{ + ztrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); +} -void ctrsm_ +void ctrsm_blis_impl ( const f77_char* side, const f77_char* uploa, @@ -1664,5 +1739,20 @@ void ctrsm_ /* Finalize BLIS. */ bli_finalize_auto(); } +void ctrsm_ +( + const f77_char* side, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const scomplex* alpha, + const scomplex* a, const f77_int* lda, + scomplex* b, const f77_int* ldb +) +{ + ctrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); +} #endif diff --git a/frame/compat/cblas/src/cblas_f77.h b/frame/compat/cblas/src/cblas_f77.h index fabf3efb1..5ec518de9 100644 --- a/frame/compat/cblas/src/cblas_f77.h +++ b/frame/compat/cblas/src/cblas_f77.h @@ -7,7 +7,7 @@ * * (Heavily hacked down from the original) * - * Copyright (C) 2020 - 2021, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2020 - 2022, Advanced Micro Devices, Inc. All rights reserved. * */ @@ -326,40 +326,40 @@ /* * Level 3 BLAS */ -#define F77_chemm chemm_ -#define F77_cherk cherk_ -#define F77_cher2k cher2k_ -#define F77_zhemm zhemm_ -#define F77_zherk zherk_ -#define F77_zher2k zher2k_ -#define F77_sgemm sgemm_ -#define F77_ssymm ssymm_ -#define F77_ssyrk ssyrk_ -#define F77_ssyr2k ssyr2k_ -#define F77_strmm strmm_ -#define F77_strsm strsm_ -#define F77_dgemm dgemm_ -#define F77_dsymm dsymm_ -#define F77_dsyrk dsyrk_ -#define F77_dsyr2k dsyr2k_ -#define F77_dtrmm dtrmm_ -#define F77_dtrsm dtrsm_ -#define F77_cgemm cgemm_ -#define F77_csymm csymm_ -#define F77_csyrk csyrk_ -#define F77_csyr2k csyr2k_ -#define F77_ctrmm ctrmm_ -#define F77_ctrsm ctrsm_ -#define F77_zgemm zgemm_ -#define F77_zsymm zsymm_ -#define F77_zsyrk zsyrk_ -#define F77_zsyr2k zsyr2k_ -#define F77_ztrmm ztrmm_ -#define F77_ztrsm ztrsm_ -#define F77_dgemmt dgemmt_ -#define F77_sgemmt sgemmt_ -#define F77_cgemmt cgemmt_ -#define F77_zgemmt zgemmt_ +#define F77_chemm chemm_blis_impl +#define F77_cherk cherk_blis_impl +#define F77_cher2k cher2k_blis_impl +#define F77_zhemm zhemm_blis_impl +#define F77_zherk zherk_blis_impl +#define F77_zher2k zher2k_blis_impl +#define F77_sgemm sgemm_blis_impl +#define F77_ssymm ssymm_blis_impl +#define F77_ssyrk ssyrk_blis_impl +#define F77_ssyr2k ssyr2k_blis_impl +#define F77_strmm strmm_blis_impl +#define F77_strsm strsm_blis_impl +#define F77_dgemm dgemm_blis_impl +#define F77_dsymm dsymm_blis_impl +#define F77_dsyrk dsyrk_blis_impl +#define F77_dsyr2k dsyr2k_blis_impl +#define F77_dtrmm dtrmm_blis_impl +#define F77_dtrsm dtrsm_blis_impl +#define F77_cgemm cgemm_blis_impl +#define F77_csymm csymm_blis_impl +#define F77_csyrk csyrk_blis_impl +#define F77_csyr2k csyr2k_blis_impl +#define F77_ctrmm ctrmm_blis_impl +#define F77_ctrsm ctrsm_blis_impl +#define F77_zgemm zgemm_blis_impl +#define F77_zsymm zsymm_blis_impl +#define F77_zsyrk zsyrk_blis_impl +#define F77_zsyr2k zsyr2k_blis_impl +#define F77_ztrmm ztrmm_blis_impl +#define F77_ztrsm ztrsm_blis_impl +#define F77_dgemmt dgemmt_blis_impl +#define F77_sgemmt sgemmt_blis_impl +#define F77_cgemmt cgemmt_blis_impl +#define F77_zgemmt zgemmt_blis_impl /* * Aux Function @@ -375,8 +375,8 @@ #define F77_daxpby daxpby_ #define F77_caxpby caxpby_ #define F77_zaxpby zaxpby_ -#define F77_cgemm3m cgemm3m_ -#define F77_zgemm3m zgemm3m_ +#define F77_cgemm3m cgemm3m_blis_impl +#define F77_zgemm3m zgemm3m_blis_impl #define F77_isamin_sub isaminsub_ #define F77_idamin_sub idaminsub_ diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index f29fdc1fe..75b9c9fdc 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018-2021, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -162,11 +162,13 @@ #define PASTEF77(ch1,name) ch1 ## name #define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name #define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name +#define PASTEF77S(ch1,name) ch1 ## name ## _blis_impl #else #define PASTEF770(name) name ## _ #define PASTEF77(ch1,name) ch1 ## name ## _ #define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name ## _ #define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name ## _ +#define PASTEF77S(ch1,name) ch1 ## name ## _blis_impl #endif // -- Include other groups of macros diff --git a/frame/util/bli_util_api_wrap.c b/frame/util/bli_util_api_wrap.c index 81300761f..9e8d1ccc3 100644 --- a/frame/util/bli_util_api_wrap.c +++ b/frame/util/bli_util_api_wrap.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2021-2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -210,17 +210,17 @@ void CGBMV_(const char *trans,const f77_int *m,const f77_int *n,const f77_int void CGEMM(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - cgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void cgemm(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - cgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMM_(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - cgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMV(const char *trans,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *x,const f77_int *incx,const scomplex *beta,scomplex *y,const f77_int *incy) @@ -285,17 +285,17 @@ void CHBMV_(const char *uplo,const f77_int *n,const f77_int *k,const scomplex void CHEMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - chemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + chemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void chemm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - chemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + chemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void CHEMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - chemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + chemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void CHEMV(const char *uplo,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *x,const f77_int *incx,const scomplex *beta,scomplex *y,const f77_int *incy) @@ -345,32 +345,32 @@ void CHER2_(const char *uplo,const f77_int *n,const scomplex *alpha,const sco void CHER2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const float *beta,scomplex *c,const f77_int *ldc) { - cher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void cher2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const float *beta,scomplex *c,const f77_int *ldc) { - cher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CHER2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const float *beta,scomplex *c,const f77_int *ldc) { - cher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CHERK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const scomplex *a,const f77_int *lda,const float *beta,scomplex *c,const f77_int *ldc) { - cherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + cherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void cherk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const scomplex *a,const f77_int *lda,const float *beta,scomplex *c,const f77_int *ldc) { - cherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + cherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void CHERK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const scomplex *a,const f77_int *lda,const float *beta,scomplex *c,const f77_int *ldc) { - cherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + cherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void CHPMV(const char *uplo,const f77_int *n,const scomplex *alpha,const scomplex *ap,const scomplex *x,const f77_int *incx,const scomplex *beta,scomplex *y,const f77_int *incy) @@ -495,47 +495,47 @@ void CSWAP_(const f77_int *n,scomplex *cx,const f77_int *incx,scomplex *cy,con void CSYMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + csymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void csymm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + csymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void CSYMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + csymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void CSYR2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + csyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void csyr2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + csyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CSYR2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *b,const f77_int *ldb,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + csyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CSYRK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + csyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void csyrk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + csyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void CSYRK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const scomplex *alpha,const scomplex *a,const f77_int *lda,const scomplex *beta,scomplex *c,const f77_int *ldc) { - csyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + csyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void CTBMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const f77_int *k,const scomplex *a,const f77_int *lda,scomplex *x,const f77_int *incx) @@ -600,17 +600,17 @@ void CTPSV_(const char *uplo,const char *trans,const char *diag,const f77_ void CTRMM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ctrmm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void CTRMM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void CTRMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const scomplex *a,const f77_int *lda,scomplex *x,const f77_int *incx) @@ -630,17 +630,17 @@ void CTRMV_(const char *uplo,const char *trans,const char *diag,const f77_ void CTRSM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ctrsm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void CTRSM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const scomplex *alpha,const scomplex *a,const f77_int *lda,scomplex *b,const f77_int *ldb) { - ctrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ctrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void CTRSV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const scomplex *a,const f77_int *lda,scomplex *x,const f77_int *incx) @@ -750,17 +750,17 @@ void DGBMV_(const char *trans,const f77_int *m,const f77_int *n,const f77_int void DGEMM(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void dgemm(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DGEMM_(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DGEMV(const char *trans,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *x,const f77_int *incx,const double *beta,double *y,const f77_int *incy) @@ -975,17 +975,17 @@ void DSWAP_(const f77_int *n,double *dx,const f77_int *incx,double *dy,const f77 void DSYMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + dsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void dsymm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + dsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void DSYMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + dsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void DSYMV(const char *uplo,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,const double *x,const f77_int *incx,const double *beta,double *y,const f77_int *incy) @@ -1035,32 +1035,32 @@ void DSYR2_(const char *uplo,const f77_int *n,const double *alpha,const double void DSYR2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void dsyr2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DSYR2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *b,const f77_int *ldb,const double *beta,double *c,const f77_int *ldc) { - dsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DSYRK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *beta,double *c,const f77_int *ldc) { - dsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + dsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void dsyrk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *beta,double *c,const f77_int *ldc) { - dsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + dsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void DSYRK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const double *a,const f77_int *lda,const double *beta,double *c,const f77_int *ldc) { - dsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + dsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void DTBMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const f77_int *k,const double *a,const f77_int *lda,double *x,const f77_int *incx) @@ -1125,17 +1125,17 @@ void DTPSV_(const char *uplo,const char *trans,const char *diag,const f77_ void DTRMM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void dtrmm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void DTRMM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void DTRMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const double *a,const f77_int *lda,double *x,const f77_int *incx) @@ -1155,17 +1155,17 @@ void DTRMV_(const char *uplo,const char *trans,const char *diag,const f77_ void DTRSM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void dtrsm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void DTRSM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const double *alpha,const double *a,const f77_int *lda,double *b,const f77_int *ldb) { - dtrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + dtrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void DTRSV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const double *a,const f77_int *lda,double *x,const f77_int *incx) @@ -1417,17 +1417,17 @@ void SGBMV_(const char *trans,const f77_int *m,const f77_int *n,const f77_int void SGEMM(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - sgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void sgemm(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - sgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SGEMM_(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - sgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SGEMV(const char *trans,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *x,const f77_int *incx,const float *beta,float *y,const f77_int *incy) @@ -1629,17 +1629,17 @@ void SSWAP_(const f77_int *n,float *sx,const f77_int *incx,float *sy,const f77 void SSYMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + ssymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ssymm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + ssymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void SSYMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + ssymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void SSYMV(const char *uplo,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,const float *x,const f77_int *incx,const float *beta,float *y,const f77_int *incy) @@ -1689,32 +1689,32 @@ void SSYR2_(const char *uplo,const f77_int *n,const float *alpha,const float void SSYR2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + ssyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ssyr2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + ssyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SSYR2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *b,const f77_int *ldb,const float *beta,float *c,const f77_int *ldc) { - ssyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + ssyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SSYRK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *beta,float *c,const f77_int *ldc) { - ssyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + ssyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ssyrk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *beta,float *c,const f77_int *ldc) { - ssyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + ssyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void SSYRK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const float *alpha,const float *a,const f77_int *lda,const float *beta,float *c,const f77_int *ldc) { - ssyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + ssyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void STBMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const f77_int *k,const float *a,const f77_int *lda,float *x,const f77_int *incx) @@ -1779,17 +1779,17 @@ void STPSV_(const char *uplo,const char *trans,const char *diag,const f77_ void STRMM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void strmm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void STRMM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void STRMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const float *a,const f77_int *lda,float *x,const f77_int *incx) @@ -1809,17 +1809,17 @@ void STRMV_(const char *uplo,const char *trans,const char *diag,const f77_ void STRSM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void strsm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void STRSM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const float *alpha,const float *a,const f77_int *lda,float *b,const f77_int *ldb) { - strsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + strsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void STRSV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const float *a,const f77_int *lda,float *x,const f77_int *incx) @@ -1929,17 +1929,17 @@ void ZGBMV_(const char *trans,const f77_int *m,const f77_int *n,const f77_int void ZGEMM(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zgemm(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMM_(const char *transa,const char *transb,const f77_int *m,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zgemm_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMV(const char *trans,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *x,const f77_int *incx,const dcomplex *beta,dcomplex *y,const f77_int *incy) @@ -2004,17 +2004,17 @@ void ZHBMV_(const char *uplo,const f77_int *n,const f77_int *k,const dcomplex void ZHEMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zhemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zhemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void zhemm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zhemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zhemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ZHEMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zhemm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zhemm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ZHEMV(const char *uplo,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *x,const f77_int *incx,const dcomplex *beta,dcomplex *y,const f77_int *incy) @@ -2064,32 +2064,32 @@ void ZHER2_(const char *uplo,const f77_int *n,const dcomplex *alpha,const dcom void ZHER2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const double *beta,dcomplex *c,const f77_int *ldc) { - zher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zher2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const double *beta,dcomplex *c,const f77_int *ldc) { - zher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZHER2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const double *beta,dcomplex *c,const f77_int *ldc) { - zher2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zher2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZHERK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const dcomplex *a,const f77_int *lda,const double *beta,dcomplex *c,const f77_int *ldc) { - zherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void zherk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const dcomplex *a,const f77_int *lda,const double *beta,dcomplex *c,const f77_int *ldc) { - zherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ZHERK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const double *alpha,const dcomplex *a,const f77_int *lda,const double *beta,dcomplex *c,const f77_int *ldc) { - zherk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zherk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ZHPMV(const char *uplo,const f77_int *n,const dcomplex *alpha,const dcomplex *ap,const dcomplex *x,const f77_int *incx,const dcomplex *beta,dcomplex *y,const f77_int *incy) @@ -2184,47 +2184,47 @@ void ZSWAP_(const f77_int *n,dcomplex *zx,const f77_int *incx,dcomplex *zy,const void ZSYMM(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void zsymm(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ZSYMM_(const char *side,const char *uplo,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsymm_( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); + zsymm_blis_impl( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void ZSYR2K(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zsyr2k(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZSYR2K_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *b,const f77_int *ldb,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyr2k_( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zsyr2k_blis_impl( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZSYRK(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void zsyrk(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ZSYRK_(const char *uplo,const char *trans,const f77_int *n,const f77_int *k,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,const dcomplex *beta,dcomplex *c,const f77_int *ldc) { - zsyrk_( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); + zsyrk_blis_impl( uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } void ZTBMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const f77_int *k,const dcomplex *a,const f77_int *lda,dcomplex *x,const f77_int *incx) @@ -2289,17 +2289,17 @@ void ZTPSV_(const char *uplo,const char *trans,const char *diag,const f77_ void ZTRMM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ztrmm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ZTRMM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrmm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrmm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ZTRMV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const dcomplex *a,const f77_int *lda,dcomplex *x,const f77_int *incx) @@ -2319,17 +2319,17 @@ void ZTRMV_(const char *uplo,const char *trans,const char *diag,const f77_ void ZTRSM(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ztrsm(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ZTRSM_(const char *side,const char *uplo,const char *transa,const char *diag,const f77_int *m,const f77_int *n,const dcomplex *alpha,const dcomplex *a,const f77_int *lda,dcomplex *b,const f77_int *ldb) { - ztrsm_( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); + ztrsm_blis_impl( side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } void ZTRSV(const char *uplo,const char *trans,const char *diag,const f77_int *n,const dcomplex *a,const f77_int *lda,dcomplex *x,const f77_int *incx) @@ -2380,17 +2380,17 @@ void CDOTUSUB_( const f77_int* n, const scomplex* x,const f77_int* incxy, const void CGEMM3M( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void cgemm3m( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMM3M_( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMM_BATCH( const f77_char* transa_array, const f77_char* transb_array,const f77_int *m_array, const f77_int *n_array, const f77_int *k_array,const scomplex* alpha_array, const scomplex** a_array, const f77_int *lda_array, const scomplex** b_array, const f77_int *ldb_array, const scomplex* beta_array, scomplex** c_array, const f77_int *ldc_array, const f77_int* group_count, const f77_int *group_size) @@ -2410,17 +2410,17 @@ void CGEMM_BATCH_( const f77_char* transa_array, const f77_char* transb_array,co void CGEMMT( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void cgemmt( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const scomplex* alpha, const scomplex* a, const f77_int* lda, const scomplex* b, const f77_int* ldb, const scomplex* beta, scomplex* c, const f77_int* ldc) { - cgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + cgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void CIMATCOPY(f77_char* trans, f77_int* rows, f77_int* cols, const scomplex* alpha,scomplex* aptr, f77_int* lda, f77_int* ldb) @@ -2545,17 +2545,17 @@ void DGEMM_BATCH_( const f77_char* transa_array, const f77_char* transb_array,co void DGEMMT( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const double* alpha, const double* a, const f77_int* lda, const double* b, const f77_int* ldb, const double* beta, double* c, const f77_int* ldc) { - dgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void dgemmt( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const double* alpha, const double* a, const f77_int* lda, const double* b, const f77_int* ldb, const double* beta, double* c, const f77_int* ldc) { - dgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const double* alpha, const double* a, const f77_int* lda, const double* b, const f77_int* ldb, const double* beta, double* c, const f77_int* ldc) { - dgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + dgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void DNRM2SUB(const f77_int* n, const double* x, const f77_int* incx, double *rval) @@ -2920,17 +2920,17 @@ void SGEMM_BATCH_(const f77_char* transa_array, const f77_char* transb_array,con void SGEMMT( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const float* alpha, const float* a, const f77_int* lda, const float* b, const f77_int* ldb, const float* beta, float* c, const f77_int* ldc) { - sgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void sgemmt( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const float* alpha, const float* a, const f77_int* lda, const float* b, const f77_int* ldb, const float* beta, float* c, const f77_int* ldc) { - sgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const float* alpha, const float* a, const f77_int* lda, const float* b, const f77_int* ldb, const float* beta, float* c, const f77_int* ldc) { - sgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + sgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void SIMATCOPY( f77_char* trans, f77_int* rows, f77_int* cols, const float* alpha,float* aptr, f77_int* lda, f77_int* ldb) @@ -3055,17 +3055,17 @@ void ZDOTUSUB_( const f77_int* n, const dcomplex* x, const f77_int* incx,const d void ZGEMM3M( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zgemm3m( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMM3M_( const f77_char* transa, const f77_char* transb, const f77_int* m, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemm3m_( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemm3m_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMM_BATCH( const f77_char* transa_array, const f77_char* transb_array,const f77_int *m_array, const f77_int *n_array, const f77_int *k_array,const dcomplex* alpha_array, const dcomplex** a_array, const f77_int *lda_array, const dcomplex** b_array, const f77_int *ldb_array, const dcomplex* beta_array, dcomplex** c_array, const f77_int *ldc_array, const f77_int* group_count, const f77_int *group_size) @@ -3085,17 +3085,17 @@ void ZGEMM_BATCH_( const f77_char* transa_array, const f77_char* transb_array,c void ZGEMMT( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void zgemmt( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZGEMMT_( const f77_char* uploc, const f77_char* transa, const f77_char* transb, const f77_int* n, const f77_int* k, const dcomplex* alpha, const dcomplex* a, const f77_int* lda, const dcomplex* b, const f77_int* ldb, const dcomplex* beta, dcomplex* c, const f77_int* ldc) { - zgemmt_( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + zgemmt_blis_impl( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void ZIMATCOPY(f77_char* trans, f77_int* rows, f77_int* cols, const dcomplex* alpha,dcomplex* aptr, f77_int* lda, f77_int* ldb)