From 08174a2f6ebbd8ed5aa2bc4edc45da80962f06bb Mon Sep 17 00:00:00 2001 From: RuQing Xu Date: Sat, 1 Jan 2022 21:35:19 +0900 Subject: [PATCH] Evict Requirement for SVE GEMM For 8<= GCC < 10 compatibility. --- config/armsve/bli_cntx_init_armsve.c | 1 - .../armsve/3/bli_armsve_utils.c | 6 +++--- .../armsve/3/bli_armsve_utils.h | 2 +- kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c | 3 +-- kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c | 3 +-- kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c | 3 +-- kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c | 3 +-- kernels/armsve/3/bli_gemm_armsve_asm_z2vx7_unindexed.c | 3 +-- kernels/armsve/3/bli_gemm_armsve_asm_z2vx8_unindexed.c | 4 +--- kernels/armsve/bli_kernels_armsve.h | 1 + 10 files changed, 11 insertions(+), 18 deletions(-) rename config/armsve/bli_armsve_config_utils.c => kernels/armsve/3/bli_armsve_utils.c (97%) rename config/armsve/bli_armsve_config_utils.h => kernels/armsve/3/bli_armsve_utils.h (98%) diff --git a/config/armsve/bli_cntx_init_armsve.c b/config/armsve/bli_cntx_init_armsve.c index fafed2229..cd07924a7 100644 --- a/config/armsve/bli_cntx_init_armsve.c +++ b/config/armsve/bli_cntx_init_armsve.c @@ -33,7 +33,6 @@ */ #include "blis.h" -#include "bli_armsve_config_utils.h" void bli_cntx_init_armsve( cntx_t* cntx ) { diff --git a/config/armsve/bli_armsve_config_utils.c b/kernels/armsve/3/bli_armsve_utils.c similarity index 97% rename from config/armsve/bli_armsve_config_utils.c rename to kernels/armsve/3/bli_armsve_utils.c index 70501e39d..1e3256d34 100644 --- a/config/armsve/bli_armsve_config_utils.c +++ b/kernels/armsve/3/bli_armsve_utils.c @@ -35,7 +35,7 @@ */ #include "blis.h" -dim_t bli_vl_bits_armsve(void) +dim_t bli_vl_bytes_armsve(void) { \ uint64_t vl = 0; __asm__ ( @@ -43,7 +43,7 @@ dim_t bli_vl_bits_armsve(void) " incb x0 \n\t" " mov %[vl], x0 \n\t" : [vl] "=r" (vl) - : + : : "x0" ); return vl; @@ -64,7 +64,7 @@ void PASTEMAC(ch, _blksz_armsve) (dim_t *m_r_, dim_t *n_r_, \ dim_t N_L3 = bli_env_get_var("BLIS_SVE_N_L3", N_L3_SVE_DEFAULT); \ dim_t C_L3 = bli_env_get_var("BLIS_SVE_C_L3", C_L3_SVE_DEFAULT); \ \ - dim_t vl_b = bli_vl_bits_armsve(); \ + dim_t vl_b = bli_vl_bytes_armsve(); \ dim_t vl = vl_b / S_Data; \ dim_t m_r = 2 * vl; \ dim_t n_r = 10; \ diff --git a/config/armsve/bli_armsve_config_utils.h b/kernels/armsve/3/bli_armsve_utils.h similarity index 98% rename from config/armsve/bli_armsve_config_utils.h rename to kernels/armsve/3/bli_armsve_utils.h index 87bba73ed..6d3aab05d 100644 --- a/config/armsve/bli_armsve_config_utils.h +++ b/kernels/armsve/3/bli_armsve_utils.h @@ -35,7 +35,7 @@ */ #include "blis.h" -dim_t bli_vl_bits_armsve(void); +dim_t bli_vl_bytes_armsve(void); void bli_s_blksz_armsve(dim_t *m_r_, dim_t *n_r_, dim_t *k_c_, dim_t *m_c_, dim_t *n_c_); void bli_d_blksz_armsve(dim_t *m_r_, dim_t *n_r_, dim_t *k_c_, dim_t *m_c_, dim_t *n_c_); diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c index 913abd1f6..c84a59f07 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c @@ -42,7 +42,6 @@ // 2vx10 microkernels. #include "armsve_asm_2vx10cmplx.h" -#include "arm_sve.h" void bli_cgemm_armsve_asm_2vx10_unindexed ( @@ -69,7 +68,7 @@ void bli_cgemm_armsve_asm_2vx10_unindexed uint64_t cs_c = cs_c0; uint64_t info = 0; - uint64_t mr = svcntw(); + uint64_t mr = bli_vl_bytes_armsve() * 2 / 8; GEMM_UKR_SETUP_CT( c, mr, 10, false ); __asm__ volatile ( diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c index 9730fb8ce..5a662df4e 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c @@ -42,7 +42,6 @@ // 2vx10 microkernels. #include "armsve_asm_2vx10.h" -#include "arm_sve.h" void bli_dgemm_armsve_asm_2vx10_unindexed ( @@ -68,7 +67,7 @@ void bli_dgemm_armsve_asm_2vx10_unindexed uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; - uint64_t mr = 2*svcntd(); + uint64_t mr = bli_vl_bytes_armsve() * 2 / 8; GEMM_UKR_SETUP_CT( d, mr, 10, false ); __asm__ volatile ( diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c index 74c4779d7..caa70a5e5 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c @@ -42,7 +42,6 @@ // 2vx10 microkernels. #include "armsve_asm_2vx10.h" -#include "arm_sve.h" void bli_sgemm_armsve_asm_2vx10_unindexed ( @@ -68,7 +67,7 @@ void bli_sgemm_armsve_asm_2vx10_unindexed uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; - uint64_t mr = 2*svcntw(); + uint64_t mr = bli_vl_bytes_armsve() * 2 / 4; GEMM_UKR_SETUP_CT( s, mr, 10, false ); __asm__ volatile ( diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c index ee041b3c4..25084af35 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c @@ -42,7 +42,6 @@ // 2vx10 microkernels. #include "armsve_asm_2vx10cmplx.h" -#include "arm_sve.h" void bli_zgemm_armsve_asm_2vx10_unindexed ( @@ -69,7 +68,7 @@ void bli_zgemm_armsve_asm_2vx10_unindexed uint64_t cs_c = cs_c0; uint64_t info = 0; - uint64_t mr = svcntd(); + uint64_t mr = bli_vl_bytes_armsve() * 2 / 16; GEMM_UKR_SETUP_CT( z, mr, 10, false ); __asm__ volatile ( diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx7_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx7_unindexed.c index 641944ecd..ca62f9db1 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx7_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx7_unindexed.c @@ -42,7 +42,6 @@ // 2vx7 microkernels. #include "armsve_asm_2vx7cmplx.h" -#include "arm_sve.h" void bli_zgemm_armsve_asm_2vx7_unindexed ( @@ -69,7 +68,7 @@ void bli_zgemm_armsve_asm_2vx7_unindexed uint64_t cs_c = cs_c0; uint64_t info = 0; - uint64_t mr = svcntd(); + uint64_t mr = bli_vl_bytes_armsve() * 2 / 16; GEMM_UKR_SETUP_CT( z, mr, 7, false ); __asm__ volatile ( diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx8_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx8_unindexed.c index 4272f72c0..4a910baac 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx8_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx8_unindexed.c @@ -42,8 +42,6 @@ // 2vx8 microkernels. #include "armsve_asm_2vx8cmplx.h" -#include "arm_sve.h" - void bli_zgemm_armsve_asm_2vx8_unindexed ( dim_t m, @@ -69,7 +67,7 @@ void bli_zgemm_armsve_asm_2vx8_unindexed uint64_t cs_c = cs_c0; uint64_t info = 0; - uint64_t mr = svcntd(); + uint64_t mr = bli_vl_bytes_armsve() * 2 / 16; GEMM_UKR_SETUP_CT( z, mr, 8, false ); __asm__ volatile ( diff --git a/kernels/armsve/bli_kernels_armsve.h b/kernels/armsve/bli_kernels_armsve.h index 0d5c5dc47..408300308 100644 --- a/kernels/armsve/bli_kernels_armsve.h +++ b/kernels/armsve/bli_kernels_armsve.h @@ -31,6 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "./3/bli_armsve_utils.h" GEMM_UKR_PROT( double, d, gemm_armsve256_asm_8x8 ) GEMM_UKR_PROT( double, d, gemm_armsve_asm_2vx10_unindexed )