From 9cc897f37455d52fbba752e3801f1a9d4a5bfdc1 Mon Sep 17 00:00:00 2001 From: Ruqing Xu Date: Thu, 3 Feb 2022 16:40:02 +0000 Subject: [PATCH] Fix SVE Compil. --- config/a64fx/bli_family_a64fx.h | 10 ++++++++++ .../armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c | 2 +- .../armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c | 2 +- .../armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c | 2 +- .../armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c | 2 +- 5 files changed, 14 insertions(+), 4 deletions(-) diff --git a/config/a64fx/bli_family_a64fx.h b/config/a64fx/bli_family_a64fx.h index 5e3f29fd4..b67ae7c60 100644 --- a/config/a64fx/bli_family_a64fx.h +++ b/config/a64fx/bli_family_a64fx.h @@ -41,6 +41,16 @@ #define BLIS_SIMD_ALIGN_SIZE 256 #define BLIS_SIMD_NUM_REGISTERS 32 +// SVE-specific configs. +#define N_L1_SVE_DEFAULT 64 +#define W_L1_SVE_DEFAULT 4 +#define C_L1_SVE_DEFAULT 256 +#define N_L2_SVE_DEFAULT 2048 +#define W_L2_SVE_DEFAULT 16 +#define C_L2_SVE_DEFAULT 256 +#define N_L3_SVE_DEFAULT 8192 +#define W_L3_SVE_DEFAULT 16 +#define C_L3_SVE_DEFAULT 256 //#endif diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c index 60a64515f..0327f6dbc 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c @@ -140,7 +140,7 @@ GEMM_ACOLCMPLX_CONTIGUOUS_LOAD_FWD(z28,z29,p0,%0,x2) " prfm PLDL1KEEP, [x16] \n\t" " add x16, x16, %4 \n\t" " prfm PLDL1KEEP, [x16] \n\t" -// " END_CCOL_PRFM: \n\t" +" END_CCOL_PRFM: \n\t" " \n\t" CLEAR_COL20(z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19) " \n\t" diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c index 7136104b5..e92eba9d6 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c @@ -139,7 +139,7 @@ GEMM_ACOL_CONTIGUOUS_LOAD(z28,z29,p0,p0,x0) " prfm PLDL1KEEP, [x16] \n\t" " add x16, x16, x7 \n\t" " prfm PLDL1KEEP, [x16] \n\t" -// " END_CCOL_PRFM: \n\t" +" END_CCOL_PRFM: \n\t" " \n\t" CLEAR_COL20(z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19) " \n\t" diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c index 20841891b..deb01f9fe 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c @@ -139,7 +139,7 @@ GEMM_ACOL_CONTIGUOUS_LOAD(z28,z29,p0,p0,x0) " prfm PLDL1STRM, [x16] \n\t" " add x16, x16, x7 \n\t" " prfm PLDL1STRM, [x16] \n\t" -// " END_CCOL_PRFM: \n\t" +" END_CCOL_PRFM: \n\t" " \n\t" CLEAR_COL20(z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19) " \n\t" diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c index 7e630894f..e941f5abd 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c @@ -140,7 +140,7 @@ GEMM_ACOLCMPLX_CONTIGUOUS_LOAD_FWD(z28,z29,p0,%0,x2) " prfm PLDL1KEEP, [x16] \n\t" " add x16, x16, %4 \n\t" " prfm PLDL1KEEP, [x16] \n\t" -// " END_CCOL_PRFM: \n\t" +" END_CCOL_PRFM: \n\t" " \n\t" CLEAR_COL20(z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19) " \n\t"