mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
Miscellaneous changes
- Change begin_asm and end_asm comments and unused code in files
kernels/haswell/3/sup/s6x16/bli_gemmsup_rv_haswell_asm_sMx6.c
kernels/zen4/3/sup/bli_gemmsup_cd_zen4_asm_z12x4m.c
to avoid problems in clobber checking script.
- Add missing clobbers in files
kernels/zen4/1m/bli_packm_zen4_asm_d24xk.c
kernels/zen4/1m/bli_packm_zen4_asm_z12xk.c
kernels/zen4/3/sup/bli_gemmsup_cv_zen4_asm_z12x4m.c
- Add missing newline at end of files.
- Update some copyright years for recent changes.
- Standardize license text formatting.
AMD-Internal: [CPUPL-6579]
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
||||
@@ -3052,7 +3052,7 @@ void bli_sgemmsup_rv_haswell_asm_1x6
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
begin_asm()
|
||||
begin _ asm()
|
||||
|
||||
vzeroall() // zero all xmm/ymm registers.
|
||||
|
||||
@@ -3377,7 +3377,7 @@ void bli_sgemmsup_rv_haswell_asm_1x6
|
||||
|
||||
|
||||
|
||||
end_asm(
|
||||
end _ asm(
|
||||
: // output operands (none)
|
||||
: // input operands
|
||||
[k_iter] "m" (k_iter),
|
||||
|
||||
@@ -9,14 +9,14 @@
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
@@ -747,4 +747,4 @@ void cvt_bf16_f32(
|
||||
cvt_bf16_f32_col_major( cvt_buffer, a, rs_a, cs_a, MC, KC, rs_p, cs_p );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -464,4 +464,4 @@ void packa_mr8_f32f32f32of32_col_major
|
||||
*cs_p = 1;
|
||||
}
|
||||
|
||||
#endif // BLIS_ADDON_LPGEMM
|
||||
#endif // BLIS_ADDON_LPGEMM
|
||||
|
||||
@@ -9,14 +9,14 @@
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
@@ -625,4 +625,4 @@ void unpackb_nr64_bf16_f32
|
||||
unpackb_nr64_bf16_f32_row_major( b, unpack_b_buffer, NC, KC, rs_b );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -51,4 +51,4 @@
|
||||
ex_out = ( __m128i )_mm_add_ps( ( __m128 )ex_out, _mm_set1_ps( 1 ) ); \
|
||||
in_reg = _mm_div_ps( _mm_set1_ps ( 1 ), ( __m128 )ex_out ); \
|
||||
|
||||
#endif // AOCL_LPGEMM_SIGMOID_AVX2_H
|
||||
#endif // AOCL_LPGEMM_SIGMOID_AVX2_H
|
||||
|
||||
@@ -1714,7 +1714,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -2251,7 +2252,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -2784,7 +2786,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -3314,7 +3317,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -3841,7 +3845,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -4364,7 +4369,9 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
|
||||
"xmm6", "xmm8", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -4885,7 +4892,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -5809,7 +5817,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -6208,7 +6217,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -6604,7 +6614,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -6997,7 +7008,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -7387,7 +7399,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -7774,7 +7787,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -8157,7 +8171,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -8773,7 +8788,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -9029,7 +9045,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -9283,7 +9300,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -9532,7 +9550,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -9780,7 +9799,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -10024,7 +10044,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -10264,7 +10285,8 @@ void bli_dpackm_zen4_asm_24xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"zmm16", "zmm17", "zmm18", "zmm20", "zmm30", "zmm31", "k2", "memory"
|
||||
"zmm16", "zmm17", "zmm18", "zmm20",
|
||||
"zmm30", "zmm31", "k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -501,7 +501,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -688,7 +688,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -875,7 +875,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -1059,7 +1059,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -1272,7 +1272,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -1441,7 +1441,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -1608,7 +1608,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -1773,7 +1773,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -1954,7 +1954,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -2100,7 +2100,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -2244,7 +2244,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
@@ -2385,7 +2385,7 @@ void bli_zpackm_zen4_asm_12xk
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
"zmm12", "zmm13", "zmm14", "zmm15",
|
||||
"memory"
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -469,7 +469,7 @@ void bli_zgemmsup_cd_zen4_asm_12x4m
|
||||
|
||||
/*
|
||||
Register clobber list and usage(for GPRs) :
|
||||
BEGIN_ASM
|
||||
BEGIN _ ASM
|
||||
|
||||
R10 - Base addr of A
|
||||
RDX - Base addr of B
|
||||
@@ -498,7 +498,7 @@ void bli_zgemmsup_cd_zen4_asm_12x4m
|
||||
R12 - Offset by 2 cols using cs_c
|
||||
R10 - Copy base addr of A
|
||||
R15 - +2, !=4
|
||||
END_ASM
|
||||
END _ ASM
|
||||
*/
|
||||
|
||||
// Assembly code-section
|
||||
@@ -3401,4 +3401,4 @@ void bli_zgemmsup_cd_zen4_asm_2x2
|
||||
"zmm28", "zmm29", "zmm30", "zmm31",
|
||||
"k3", "k4", "memory"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7805,4 +7805,4 @@ void bli_cgemmsup_cv_zen4_asm_fx1
|
||||
"zmm28", "zmm29", "zmm30", "zmm31",
|
||||
"k2", "k3", "memory"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7064,6 +7064,9 @@ void bli_zgemmsup_cv_zen4_asm_12x4m
|
||||
[cs_c] "m" (cs_c)
|
||||
: // register clobber list
|
||||
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "al",
|
||||
"ymm0", "ymm1", "ymm2", "ymm3", "ymm5",
|
||||
"ymm6", "ymm7", "ymm8", "ymm9",
|
||||
"ymm10", "ymm11", "ymm12", "ymm15",
|
||||
"zmm0", "zmm1", "zmm2", "zmm3",
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
@@ -7072,7 +7075,7 @@ void bli_zgemmsup_cv_zen4_asm_12x4m
|
||||
"zmm20", "zmm21", "zmm22", "zmm23",
|
||||
"zmm24", "zmm25", "zmm26", "zmm27",
|
||||
"zmm28", "zmm29", "zmm30", "zmm31",
|
||||
"memory"
|
||||
"k2", "memory"
|
||||
)
|
||||
}
|
||||
|
||||
@@ -7529,6 +7532,11 @@ void bli_zgemmsup_cv_zen4_asm_12x3m
|
||||
[cs_c] "m" (cs_c)
|
||||
: // register clobber list
|
||||
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "al",
|
||||
"xmm9", "xmm10", "xmm11", "xmm12",
|
||||
"ymm0", "ymm1", "ymm2", "ymm3",
|
||||
"ymm5", "ymm6", "ymm7", "ymm8",
|
||||
"ymm9", "ymm10", "ymm11", "ymm12",
|
||||
"ymm13", "ymm14", "ymm15",
|
||||
"zmm0", "zmm1", "zmm2", "zmm3",
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
@@ -7938,6 +7946,8 @@ void bli_zgemmsup_cv_zen4_asm_12x2m
|
||||
[cs_c] "m" (cs_c)
|
||||
: // register clobber list
|
||||
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "al",
|
||||
"ymm0", "ymm1", "ymm2", "ymm3", "ymm5",
|
||||
"ymm6","ymm7", "ymm8", "ymm15",
|
||||
"zmm0", "zmm1", "zmm2", "zmm3",
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
@@ -8332,6 +8342,9 @@ void bli_zgemmsup_cv_zen4_asm_12x1m
|
||||
[cs_c] "m" (cs_c)
|
||||
: // register clobber list
|
||||
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "al",
|
||||
"xmm5", "xmm6", "xmm14", "xmm15",
|
||||
"ymm0", "ymm1", "ymm2", "ymm3",
|
||||
"ymm5", "ymm6", "ymm14", "ymm15",
|
||||
"zmm0", "zmm1", "zmm2", "zmm3",
|
||||
"zmm4", "zmm5", "zmm6", "zmm7",
|
||||
"zmm8", "zmm9", "zmm10", "zmm11",
|
||||
|
||||
@@ -992,4 +992,4 @@ scr = ( __m512)( _mm512_sllv_epi32 \
|
||||
\
|
||||
TANHF_AVX512(x_tanh, r, r2, x, z, dn, q)
|
||||
|
||||
#endif //LPGEMM_F32_SGEMM_KERN_MACROS_H
|
||||
#endif //LPGEMM_F32_SGEMM_KERN_MACROS_H
|
||||
|
||||
@@ -1460,4 +1460,4 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_256_6x64m)
|
||||
}
|
||||
}
|
||||
|
||||
#endif // BLIS_ADDON_LPGEMM
|
||||
#endif // BLIS_ADDON_LPGEMM
|
||||
|
||||
@@ -113,4 +113,4 @@ void bli_dcopyv_zen5_asm
|
||||
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_2)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user