diff --git a/kernels/haswell/3/sup/s6x16/bli_gemmsup_rv_haswell_asm_sMx6.c b/kernels/haswell/3/sup/s6x16/bli_gemmsup_rv_haswell_asm_sMx6.c index ac4e1ee0b..1d80111ea 100644 --- a/kernels/haswell/3/sup/s6x16/bli_gemmsup_rv_haswell_asm_sMx6.c +++ b/kernels/haswell/3/sup/s6x16/bli_gemmsup_rv_haswell_asm_sMx6.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019, Advanced Micro Devices, Inc. + Copyright (C) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -662,6 +662,9 @@ void bli_sgemmsup_rv_haswell_asm_6x6 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "ymm8", "ymm10", + "ymm12", "ymm14", "memory" ) } @@ -1236,6 +1239,9 @@ void bli_sgemmsup_rv_haswell_asm_5x6 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "ymm8", "ymm10", + "ymm12", "memory" ) } @@ -1723,6 +1729,8 @@ void bli_sgemmsup_rv_haswell_asm_4x6 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "ymm8", "ymm10", "memory" ) } @@ -2211,6 +2219,8 @@ void bli_sgemmsup_rv_haswell_asm_3x6 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", "ymm4", + "ymm6", "ymm8", "memory" ) } @@ -2611,6 +2621,8 @@ void bli_sgemmsup_rv_haswell_asm_2x6 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", "ymm4", + "ymm6", "memory" ) } @@ -3000,6 +3012,7 @@ void bli_sgemmsup_rv_haswell_asm_1x6 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", "ymm4", "memory" ) } @@ -3389,6 +3402,7 @@ void bli_sgemmsup_rv_haswell_asm_1x6 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", "ymm4", "memory" ) } diff --git a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16.c b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16.c index c309c8c0c..3c47a910b 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16.c +++ b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16.c @@ -2,8 +2,10 @@ BLIS An object-based framework for developing high-performance BLAS-like libraries. + Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020 - 2022 , Advanced Micro Devices, Inc. + Copyright (C) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -15,6 +17,7 @@ - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -27,7 +30,9 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #include "blis.h" + #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* @@ -328,6 +333,9 @@ void bli_sgemmsup_rd_zen_asm_2x16 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm7", "ymm8", + "ymm10", "ymm11", "ymm13", "ymm14", "memory" ) } @@ -560,6 +568,8 @@ void bli_sgemmsup_rd_zen_asm_1x16 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", "ymm4", + "ymm7", "ymm10", "ymm13", "memory" ) } @@ -858,6 +868,9 @@ void bli_sgemmsup_rd_zen_asm_2x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm7", "ymm8", + "ymm10", "ymm11", "ymm13", "ymm14", "memory" ) } @@ -1088,6 +1101,8 @@ void bli_sgemmsup_rd_zen_asm_1x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", "ymm4", + "ymm7", "ymm10", "ymm13", "memory" ) } @@ -1354,6 +1369,9 @@ void bli_sgemmsup_rd_zen_asm_2x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm7", "ymm8", + "ymm10", "ymm11", "ymm13", "ymm14", "memory" ) } @@ -1568,6 +1586,8 @@ void bli_sgemmsup_rd_zen_asm_1x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", "ymm4", + "ymm7", "ymm10", "ymm13", "memory" ) } @@ -1792,6 +1812,8 @@ void bli_sgemmsup_rd_zen_asm_2x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm3", "ymm4", + "ymm5", "ymm6", "ymm7", "memory" ) } @@ -1979,6 +2001,8 @@ void bli_sgemmsup_rd_zen_asm_1x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm3", "ymm4", + "ymm5", "memory" ) } @@ -2370,6 +2394,10 @@ void bli_sgemmsup_rd_zen_asm_6x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm3", "ymm4", + "ymm5", "ymm6", "ymm7", "ymm8", + "ymm9", "ymm10", "ymm11", "ymm12", + "ymm13", "ymm14", "ymm15", "memory" ) consider_edge_cases: @@ -2664,6 +2692,9 @@ void bli_sgemmsup_rd_zen_asm_3x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm3", "ymm4", + "ymm5", "ymm6", "ymm7", "ymm8", + "ymm9", "memory" ) } diff --git a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16m.c b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16m.c index 00773b3b5..6d1d001b5 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16m.c +++ b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16m.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -556,6 +556,10 @@ void bli_sgemmsup_rd_zen_asm_6x16m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) consider_edge_cases: @@ -1035,6 +1039,10 @@ void bli_sgemmsup_rd_zen_asm_6x8m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) consider_edge_cases: @@ -1517,6 +1525,10 @@ void bli_sgemmsup_rd_zen_asm_6x4m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) consider_edge_cases: @@ -1923,6 +1935,10 @@ void bli_sgemmsup_rd_zen_asm_6x2m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) consider_edge_cases: diff --git a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16n.c b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16n.c index dfe5ca28a..6b84594e3 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16n.c +++ b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_s6x16n.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -594,6 +594,10 @@ void bli_sgemmsup_rd_zen_asm_6x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) @@ -1061,6 +1065,10 @@ void bli_sgemmsup_rd_zen_asm_3x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) @@ -1471,6 +1479,10 @@ void bli_sgemmsup_rd_zen_asm_2x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm7", "ymm8", + "ymm10", "ymm11", "ymm13", "ymm14", + "ymm15", "memory" ) @@ -1828,6 +1840,8 @@ void bli_sgemmsup_rd_zen_asm_1x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", "ymm4", + "ymm7", "ymm10", "ymm13", "memory" ) diff --git a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4.c b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4.c index bdbdfd045..d07ee3ec0 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4.c +++ b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4.c @@ -3,7 +3,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 , Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -396,6 +396,9 @@ void bli_zgemmsup_rd_zen_asm_2x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm7", "ymm8", + "ymm10", "ymm11", "ymm13", "ymm14", "memory" ) @@ -686,6 +689,8 @@ void bli_zgemmsup_rd_zen_asm_1x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm7", "ymm10", "ymm13", "memory" ) @@ -1025,6 +1030,9 @@ void bli_zgemmsup_rd_zen_asm_2x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm7", "ymm8", + "ymm10", "ymm11", "ymm13", "ymm14", "memory" ) @@ -1301,6 +1309,8 @@ void bli_zgemmsup_rd_zen_asm_1x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm7", "ymm10", "ymm13", "memory" ) diff --git a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4m.c b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4m.c index 9cf359af0..b8243a04e 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4m.c +++ b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4m.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 , Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -605,6 +605,10 @@ void bli_zgemmsup_rd_zen_asm_3x4m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) @@ -1060,6 +1064,10 @@ void bli_zgemmsup_rd_zen_asm_3x2m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) diff --git a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4n.c b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4n.c index 6d628027d..8223e756f 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4n.c +++ b/kernels/zen/3/sup/bli_gemmsup_rd_zen_asm_z3x4n.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 , Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -587,6 +587,10 @@ void bli_zgemmsup_rd_zen_asm_3x4n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) @@ -1030,6 +1034,9 @@ void bli_zgemmsup_rd_zen_asm_2x4n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm7", "ymm8", + "ymm10", "ymm11", "ymm13", "ymm14", "memory" ) diff --git a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_c3x8.c b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_c3x8.c index 03c1627f1..386c2ca8f 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_c3x8.c +++ b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_c3x8.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -548,6 +548,9 @@ void bli_cgemmsup_rv_zen_asm_2x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", "memory" ) } @@ -910,6 +913,8 @@ void bli_cgemmsup_rv_zen_asm_1x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", "memory" ) } @@ -1286,6 +1291,8 @@ void bli_cgemmsup_rv_zen_asm_2x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "ymm8", "ymm10", "memory" ) } @@ -1604,6 +1611,8 @@ void bli_cgemmsup_rv_zen_asm_1x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "ymm8", "ymm10", "memory" ) } diff --git a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_c3x8m.c b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_c3x8m.c index 07fbd2629..f92b1cc17 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_c3x8m.c +++ b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_c3x8m.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020, Advanced Micro Devices, Inc. + Copyright (C) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -739,6 +739,10 @@ void bli_cgemmsup_rv_zen_asm_3x8m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) @@ -1230,6 +1234,9 @@ void bli_cgemmsup_rv_zen_asm_3x4m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "ymm8", "ymm10", + "ymm12", "ymm14", "memory" ) diff --git a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16.c b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16.c index 7befbb69b..2cb3a844c 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16.c +++ b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020-2022, Advanced Micro Devices, Inc. + Copyright (C) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -717,12 +717,16 @@ void bli_sgemmsup_rv_zen_asm_5x16 [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list - "rax", "rbx", "rcx", "rdx", "rsi", "rdi", + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -1213,12 +1217,16 @@ void bli_sgemmsup_rv_zen_asm_4x16 [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list - "rax", "rbx", "rcx", "rdx", "rsi", "rdi", + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -1779,6 +1787,10 @@ void bli_sgemmsup_rv_zen_asm_3x16 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -2172,6 +2184,10 @@ void bli_sgemmsup_rv_zen_asm_2x16 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -2533,6 +2549,10 @@ void bli_sgemmsup_rv_zen_asm_1x16 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -2981,6 +3001,10 @@ void bli_sgemmsup_rv_zen_asm_6x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -3434,6 +3458,10 @@ void bli_sgemmsup_rv_zen_asm_5x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -3800,6 +3828,10 @@ void bli_sgemmsup_rv_zen_asm_4x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -4195,6 +4227,10 @@ void bli_sgemmsup_rv_zen_asm_3x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -4504,6 +4540,10 @@ void bli_sgemmsup_rv_zen_asm_2x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -4767,6 +4807,10 @@ void bli_sgemmsup_rv_zen_asm_1x8 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -5168,6 +5212,10 @@ void bli_sgemmsup_rv_zen_asm_6x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -5556,6 +5604,10 @@ void bli_sgemmsup_rv_zen_asm_5x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -5894,6 +5946,9 @@ void bli_sgemmsup_rv_zen_asm_4x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -6219,6 +6274,9 @@ void bli_sgemmsup_rv_zen_asm_3x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -6492,6 +6550,9 @@ void bli_sgemmsup_rv_zen_asm_2x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -6746,6 +6807,10 @@ void bli_sgemmsup_rv_zen_asm_1x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm3", "ymm4", "ymm5", + "ymm6", "ymm7", "ymm8", "ymm9", + "ymm10", "ymm11", "ymm12", "ymm13", + "ymm14", "ymm15", "memory" ) } @@ -7133,6 +7198,9 @@ void bli_sgemmsup_rv_zen_asm_6x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -7506,6 +7574,9 @@ void bli_sgemmsup_rv_zen_asm_5x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -7842,6 +7913,10 @@ void bli_sgemmsup_rv_zen_asm_4x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -8144,6 +8219,10 @@ void bli_sgemmsup_rv_zen_asm_3x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -8406,6 +8485,10 @@ void bli_sgemmsup_rv_zen_asm_2x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } @@ -8643,6 +8726,10 @@ void bli_sgemmsup_rv_zen_asm_1x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm2","ymm4", "ymm5", + "ymm6", "ymm7", "ymm8", "ymm9", + "ymm10", "ymm11", "ymm12", "ymm13", + "ymm14", "ymm15", "memory" ) } diff --git a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16m.c b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16m.c index d5e2135a6..19acd5a1b 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16m.c +++ b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16m.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020-2022, Advanced Micro Devices, Inc. + Copyright (C) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -895,6 +895,10 @@ void bli_sgemmsup_rv_zen_asm_6x16m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) @@ -1439,6 +1443,9 @@ void bli_sgemmsup_rv_zen_asm_6x8m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "ymm8", "ymm10", + "ymm12", "ymm14", "memory" ) diff --git a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16n.c b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16n.c index f46244d66..eb690e9f6 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16n.c +++ b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_s6x16n.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2021, Advanced Micro Devices, Inc. + Copyright (C) 2021-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -855,6 +855,10 @@ void bli_sgemmsup_rv_zen_asm_6x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) @@ -1621,6 +1625,10 @@ void bli_sgemmsup_rv_zen_asm_5x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "memory" ) @@ -2230,6 +2238,9 @@ void bli_sgemmsup_rv_zen_asm_4x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", "memory" ) @@ -2876,6 +2887,10 @@ void bli_sgemmsup_rv_zen_asm_3x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) @@ -3366,6 +3381,9 @@ void bli_sgemmsup_rv_zen_asm_2x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm11", "ymm12", "memory" ) @@ -3821,6 +3839,8 @@ void bli_sgemmsup_rv_zen_asm_1x16n "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "memory" ) diff --git a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_z3x4.c b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_z3x4.c index 787d3f772..298ede720 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_z3x4.c +++ b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_z3x4.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020-2021, Advanced Micro Devices, Inc. + Copyright (C) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -515,6 +515,9 @@ void bli_zgemmsup_rv_zen_asm_2x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", "memory" ) @@ -875,6 +878,8 @@ void bli_zgemmsup_rv_zen_asm_1x4 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", "memory" ) @@ -1236,6 +1241,8 @@ void bli_zgemmsup_rv_zen_asm_2x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "ymm8", "ymm10", "memory" ) } @@ -1543,6 +1550,8 @@ void bli_zgemmsup_rv_zen_asm_1x2 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "memory" ) diff --git a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_z3x4m.c b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_z3x4m.c index 1c3c386a0..804e196e1 100644 --- a/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_z3x4m.c +++ b/kernels/zen/3/sup/bli_gemmsup_rv_zen_asm_z3x4m.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020 - 2021, Advanced Micro Devices, Inc.All rights reserved. + Copyright (C) 2020 - 2023, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -798,6 +798,10 @@ void bli_zgemmsup_rv_zen_asm_3x4m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", + "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) @@ -1260,6 +1264,9 @@ void bli_zgemmsup_rv_zen_asm_3x2m "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "ymm0", "ymm1", "ymm2", "ymm3", + "ymm4", "ymm6", "ymm8", "ymm10", + "ymm12", "ymm14", "memory" ) diff --git a/kernels/zen4/3/bli_zgemm_zen4_asm_12x4.c b/kernels/zen4/3/bli_zgemm_zen4_asm_12x4.c index 795f61849..4fc69acd1 100644 --- a/kernels/zen4/3/bli_zgemm_zen4_asm_12x4.c +++ b/kernels/zen4/3/bli_zgemm_zen4_asm_12x4.c @@ -1071,12 +1071,15 @@ void bli_zgemm_zen4_asm_12x4( [beta_mul_type] "m"(beta_mul_type) : // register clobber list "rax", "rbx", "rcx", "rdi", "rsi", "r9", "r10", "r12", "r14", - "xmm8", "xmm9", "xmm10", "zmm0", "zmm1", "zmm2", + "xmm8", "xmm9", "xmm10", + "ymm8", "ymm9", + "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", - "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", - "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory") + "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", + "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" + ) AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_7); }