BLIS: BLAS3 quick return functionality

Implement netlib BLAS style quick return functionality for when no
work is required. Similar functionality was already in HERK and HER2K
routines.

AMD copyrights updated.

AMD-Internal: [CPUPL-2373]
Change-Id: I0ebe9d76465b0e48b2ff5c2f1cc2a75763fe187c
This commit is contained in:
Edward Smyth
2022-08-09 06:06:07 -04:00
parent 171fb7358d
commit 7f322da01d
11 changed files with 306 additions and 9 deletions

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 22, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -86,6 +86,17 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \
@@ -175,6 +186,17 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \
@@ -344,6 +366,17 @@ void dzgemm_
ldc
);
/* Quick return if possible. */
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0)
&& PASTEMAC(ch,eq1)( *beta ) ))
{
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
/* Finalize BLIS. */
bli_finalize_auto();
return; \
}
/* Map BLAS chars to their corresponding BLIS enumerated type value. */
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa );
bli_param_map_netlib_to_blis_trans( *transb, &blis_transb );

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2022, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -83,6 +83,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \
@@ -164,6 +174,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 22, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -86,6 +86,17 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \
@@ -175,6 +186,17 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k); \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \
@@ -343,6 +365,16 @@ void dgemm_
ldc
);
/* Quick return if possible. */
if ( *m == 0 || *n == 0 || ((*alpha == 0.0 || *k == 0) && *beta == 1.0))
{
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
/* Finalize BLIS. */
bli_finalize_auto();
return;
}
/* Map BLAS chars to their corresponding BLIS enumerated type value. */
bli_param_map_netlib_to_blis_trans(*transa, &blis_transa);
bli_param_map_netlib_to_blis_trans(*transb, &blis_transb);
@@ -666,6 +698,17 @@ void zgemm_
ldc
);
/* Quick return if possible. */
if ( *m == 0 || *n == 0 || (( PASTEMAC(z,eq0)( *alpha ) || *k == 0)
&& PASTEMAC(z,eq1)( *beta ) ))
{
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
/* Finalize BLIS. */
bli_finalize_auto();
return;
}
/* Map BLAS chars to their corresponding BLIS enumerated type value. */
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa );
bli_param_map_netlib_to_blis_trans( *transb, &blis_transb );
@@ -918,6 +961,17 @@ void dzgemm_
ldc
);
/* Quick return if possible. */
if ( *m == 0 || *n == 0 || (( PASTEMAC(z,eq0)( *alpha ) || *k == 0)
&& PASTEMAC(z,eq1)( *beta ) ))
{
AOCL_DTL_LOG_GEMM_STATS(AOCL_DTL_LEVEL_TRACE_1, *m, *n, *k);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
/* Finalize BLIS. */
bli_finalize_auto();
return;
}
/* Map BLAS chars to their corresponding BLIS enumerated type value. */
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa );
bli_param_map_netlib_to_blis_trans( *transb, &blis_transb );

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020, Advanced Micro Devices, Inc.
Copyright (C) 2020 - 2022, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -84,6 +84,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \
@@ -170,6 +180,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *n == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2021, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -84,6 +84,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || ( PASTEMAC(ch,eq0)( *alpha ) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \
@@ -165,6 +175,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || ( PASTEMAC(ch,eq0)( *alpha ) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2021, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -83,6 +83,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || ( PASTEMAC(ch,eq0)( *alpha ) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \
@@ -163,6 +173,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 || ( PASTEMAC(ch,eq0)( *alpha ) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin.
Copyright (C) 2019 - 2021, Advanced Micro Devices, Inc.All Rights Reserved.
Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc.All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -83,6 +83,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \
@@ -172,6 +182,16 @@ void PASTEF77(ch,blasname) \
ldb, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin.
Copyright (C) 2019 - 2021, Advanced Micro Devices, Inc.All Rights Reserved.
Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc.All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -81,6 +81,16 @@ void PASTEF77(ch,blasname) \
lda, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \
@@ -164,6 +174,16 @@ void PASTEF77(ch,blasname) \
lda, \
ldc \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || (( PASTEMAC(ch,eq0)( *alpha ) || *k == 0) \
&& PASTEMAC(ch,eq1)( *beta ) )) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin.
Copyright (C) 2019 - 2021, Advanced Micro Devices, Inc.All Rights Reserved.
Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc.All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -86,6 +86,15 @@ void PASTEF77(ch,blasname) \
lda, \
ldb \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \
@@ -168,6 +177,15 @@ void PASTEF77(ch,blasname) \
lda, \
ldb \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \

View File

@@ -85,6 +85,15 @@ void PASTEF77(ch,blasname) \
lda, \
ldb \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \
@@ -169,6 +178,15 @@ void PASTEF77(ch,blasname) \
lda, \
ldb \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \

View File

@@ -85,6 +85,15 @@ void PASTEF77(ch,blasname) \
lda, \
ldb \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \
@@ -169,6 +178,15 @@ void PASTEF77(ch,blasname) \
lda, \
ldb \
); \
\
/* Quick return if possible. */ \
if ( *m == 0 || *n == 0 ) \
{ \
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
return; \
} \
\
/* Map BLAS chars to their corresponding BLIS enumerated type value. */ \
bli_param_map_netlib_to_blis_side( *side, &blis_side ); \
@@ -424,6 +442,15 @@ void strsm_
ldb
);
/* Quick return if possible. */
if ( *m == 0 || *n == 0 )
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
/* Finalize BLIS. */
bli_finalize_auto();
return;
}
/* Map BLAS chars to their corresponding BLIS enumerated type value. */
bli_param_map_netlib_to_blis_side( *side, &blis_side );
bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa );
@@ -686,6 +713,15 @@ void dtrsm_
ldb
);
/* Quick return if possible. */
if ( *m == 0 || *n == 0 )
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
/* Finalize BLIS. */
bli_finalize_auto();
return;
}
/* Map BLAS chars to their corresponding BLIS enumerated type value. */
bli_param_map_netlib_to_blis_side( *side, &blis_side );
bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa );
@@ -982,6 +1018,15 @@ void ztrsm_
ldb
);
/* Quick return if possible. */
if ( *m == 0 || *n == 0 )
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
/* Finalize BLIS. */
bli_finalize_auto();
return;
}
/* Map BLAS chars to their corresponding BLIS enumerated type value. */
bli_param_map_netlib_to_blis_side( *side, &blis_side );
bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa );
@@ -1308,6 +1353,15 @@ void ctrsm_
ldb
);
/* Quick return if possible. */
if ( *m == 0 || *n == 0 )
{
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_INFO);
/* Finalize BLIS. */
bli_finalize_auto();
return;
}
/* Map BLAS chars to their corresponding BLIS enumerated type value. */
bli_param_map_netlib_to_blis_side( *side, &blis_side );
bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa );