Miscellaneous changes

- Change begin_asm and end_asm comments and unused code in files
     kernels/haswell/3/sup/s6x16/bli_gemmsup_rv_haswell_asm_sMx6.c
     kernels/zen4/3/sup/bli_gemmsup_cd_zen4_asm_z12x4m.c
  to avoid problems in clobber checking script.
- Add missing clobbers in files
     kernels/zen4/1m/bli_packm_zen4_asm_d24xk.c
     kernels/zen4/1m/bli_packm_zen4_asm_z12xk.c
     kernels/zen4/3/sup/bli_gemmsup_cv_zen4_asm_z12x4m.c
- Add missing newline at end of files.
- Update some copyright years for recent changes.
- Standardize license text formatting.

AMD-Internal: [CPUPL-6579]
This commit is contained in:
Smyth, Edward
2025-08-26 16:37:43 +01:00
committed by GitHub
parent 3cad637afa
commit fb2a682725
45 changed files with 158 additions and 125 deletions

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@@ -3052,7 +3052,7 @@ void bli_sgemmsup_rv_haswell_asm_1x6
// -------------------------------------------------------------------------
begin_asm()
begin _ asm()
vzeroall() // zero all xmm/ymm registers.
@@ -3377,7 +3377,7 @@ void bli_sgemmsup_rv_haswell_asm_1x6
end_asm(
end _ asm(
: // output operands (none)
: // input operands
[k_iter] "m" (k_iter),

View File

@@ -9,14 +9,14 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@@ -747,4 +747,4 @@ void cvt_bf16_f32(
cvt_bf16_f32_col_major( cvt_buffer, a, rs_a, cs_a, MC, KC, rs_p, cs_p );
}
}
#endif
#endif

View File

@@ -464,4 +464,4 @@ void packa_mr8_f32f32f32of32_col_major
*cs_p = 1;
}
#endif // BLIS_ADDON_LPGEMM
#endif // BLIS_ADDON_LPGEMM

View File

@@ -9,14 +9,14 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@@ -625,4 +625,4 @@ void unpackb_nr64_bf16_f32
unpackb_nr64_bf16_f32_row_major( b, unpack_b_buffer, NC, KC, rs_b );
}
}
#endif
#endif

View File

@@ -51,4 +51,4 @@
ex_out = ( __m128i )_mm_add_ps( ( __m128 )ex_out, _mm_set1_ps( 1 ) ); \
in_reg = _mm_div_ps( _mm_set1_ps ( 1 ), ( __m128 )ex_out ); \
#endif // AOCL_LPGEMM_SIGMOID_AVX2_H
#endif // AOCL_LPGEMM_SIGMOID_AVX2_H

View File

@@ -1714,7 +1714,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -2251,7 +2252,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -2784,7 +2786,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -3314,7 +3317,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -3841,7 +3845,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -4364,7 +4369,9 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
"xmm6", "xmm8", "k3", "memory"
)
break;
}
@@ -4885,7 +4892,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -5809,7 +5817,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -6208,7 +6217,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -6604,7 +6614,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -6997,7 +7008,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -7387,7 +7399,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -7774,7 +7787,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -8157,7 +8171,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -8773,7 +8788,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -9029,7 +9045,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -9283,7 +9300,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -9532,7 +9550,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -9780,7 +9799,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -10024,7 +10044,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}
@@ -10264,7 +10285,8 @@ void bli_dpackm_zen4_asm_24xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
"zmm16", "zmm17", "zmm18", "zmm20",
"zmm30", "zmm31", "k2", "k3", "memory"
)
break;
}

View File

@@ -501,7 +501,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "memory"
)
break;
}
@@ -688,7 +688,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "k3", "memory"
)
break;
}
@@ -875,7 +875,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "k3", "memory"
)
break;
}
@@ -1059,7 +1059,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "k3", "memory"
)
break;
}
@@ -1272,7 +1272,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "memory"
)
break;
}
@@ -1441,7 +1441,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "k3", "memory"
)
break;
}
@@ -1608,7 +1608,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "k3", "memory"
)
break;
}
@@ -1773,7 +1773,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "k3", "memory"
)
break;
}
@@ -1954,7 +1954,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "memory"
)
break;
}
@@ -2100,7 +2100,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "k3", "memory"
)
break;
}
@@ -2244,7 +2244,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "k3", "memory"
)
break;
}
@@ -2385,7 +2385,7 @@ void bli_zpackm_zen4_asm_12xk
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
"zmm12", "zmm13", "zmm14", "zmm15",
"memory"
"k2", "k3", "memory"
)
break;
}

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -469,7 +469,7 @@ void bli_zgemmsup_cd_zen4_asm_12x4m
/*
Register clobber list and usage(for GPRs) :
BEGIN_ASM
BEGIN _ ASM
R10 - Base addr of A
RDX - Base addr of B
@@ -498,7 +498,7 @@ void bli_zgemmsup_cd_zen4_asm_12x4m
R12 - Offset by 2 cols using cs_c
R10 - Copy base addr of A
R15 - +2, !=4
END_ASM
END _ ASM
*/
// Assembly code-section
@@ -3401,4 +3401,4 @@ void bli_zgemmsup_cd_zen4_asm_2x2
"zmm28", "zmm29", "zmm30", "zmm31",
"k3", "k4", "memory"
)
}
}

View File

@@ -7805,4 +7805,4 @@ void bli_cgemmsup_cv_zen4_asm_fx1
"zmm28", "zmm29", "zmm30", "zmm31",
"k2", "k3", "memory"
)
}
}

View File

@@ -7064,6 +7064,9 @@ void bli_zgemmsup_cv_zen4_asm_12x4m
[cs_c] "m" (cs_c)
: // register clobber list
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "al",
"ymm0", "ymm1", "ymm2", "ymm3", "ymm5",
"ymm6", "ymm7", "ymm8", "ymm9",
"ymm10", "ymm11", "ymm12", "ymm15",
"zmm0", "zmm1", "zmm2", "zmm3",
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
@@ -7072,7 +7075,7 @@ void bli_zgemmsup_cv_zen4_asm_12x4m
"zmm20", "zmm21", "zmm22", "zmm23",
"zmm24", "zmm25", "zmm26", "zmm27",
"zmm28", "zmm29", "zmm30", "zmm31",
"memory"
"k2", "memory"
)
}
@@ -7529,6 +7532,11 @@ void bli_zgemmsup_cv_zen4_asm_12x3m
[cs_c] "m" (cs_c)
: // register clobber list
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "al",
"xmm9", "xmm10", "xmm11", "xmm12",
"ymm0", "ymm1", "ymm2", "ymm3",
"ymm5", "ymm6", "ymm7", "ymm8",
"ymm9", "ymm10", "ymm11", "ymm12",
"ymm13", "ymm14", "ymm15",
"zmm0", "zmm1", "zmm2", "zmm3",
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
@@ -7938,6 +7946,8 @@ void bli_zgemmsup_cv_zen4_asm_12x2m
[cs_c] "m" (cs_c)
: // register clobber list
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "al",
"ymm0", "ymm1", "ymm2", "ymm3", "ymm5",
"ymm6","ymm7", "ymm8", "ymm15",
"zmm0", "zmm1", "zmm2", "zmm3",
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",
@@ -8332,6 +8342,9 @@ void bli_zgemmsup_cv_zen4_asm_12x1m
[cs_c] "m" (cs_c)
: // register clobber list
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "al",
"xmm5", "xmm6", "xmm14", "xmm15",
"ymm0", "ymm1", "ymm2", "ymm3",
"ymm5", "ymm6", "ymm14", "ymm15",
"zmm0", "zmm1", "zmm2", "zmm3",
"zmm4", "zmm5", "zmm6", "zmm7",
"zmm8", "zmm9", "zmm10", "zmm11",

View File

@@ -992,4 +992,4 @@ scr = ( __m512)( _mm512_sllv_epi32 \
\
TANHF_AVX512(x_tanh, r, r2, x, z, dn, q)
#endif //LPGEMM_F32_SGEMM_KERN_MACROS_H
#endif //LPGEMM_F32_SGEMM_KERN_MACROS_H

View File

@@ -1460,4 +1460,4 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_256_6x64m)
}
}
#endif // BLIS_ADDON_LPGEMM
#endif // BLIS_ADDON_LPGEMM

View File

@@ -113,4 +113,4 @@ void bli_dcopyv_zen5_asm
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
return;
}
}