diff --git a/config/a64fx/bli_family_a64fx.h b/config/a64fx/bli_family_a64fx.h index 5e3f29fd4..b67ae7c60 100644 --- a/config/a64fx/bli_family_a64fx.h +++ b/config/a64fx/bli_family_a64fx.h @@ -41,6 +41,16 @@ #define BLIS_SIMD_ALIGN_SIZE 256 #define BLIS_SIMD_NUM_REGISTERS 32 +// SVE-specific configs. +#define N_L1_SVE_DEFAULT 64 +#define W_L1_SVE_DEFAULT 4 +#define C_L1_SVE_DEFAULT 256 +#define N_L2_SVE_DEFAULT 2048 +#define W_L2_SVE_DEFAULT 16 +#define C_L2_SVE_DEFAULT 256 +#define N_L3_SVE_DEFAULT 8192 +#define W_L3_SVE_DEFAULT 16 +#define C_L3_SVE_DEFAULT 256 //#endif diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c index 60a64515f..0327f6dbc 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_c2vx10_unindexed.c @@ -140,7 +140,7 @@ GEMM_ACOLCMPLX_CONTIGUOUS_LOAD_FWD(z28,z29,p0,%0,x2) " prfm PLDL1KEEP, [x16] \n\t" " add x16, x16, %4 \n\t" " prfm PLDL1KEEP, [x16] \n\t" -// " END_CCOL_PRFM: \n\t" +" END_CCOL_PRFM: \n\t" " \n\t" CLEAR_COL20(z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19) " \n\t" diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c index 7136104b5..e92eba9d6 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_d2vx10_unindexed.c @@ -139,7 +139,7 @@ GEMM_ACOL_CONTIGUOUS_LOAD(z28,z29,p0,p0,x0) " prfm PLDL1KEEP, [x16] \n\t" " add x16, x16, x7 \n\t" " prfm PLDL1KEEP, [x16] \n\t" -// " END_CCOL_PRFM: \n\t" +" END_CCOL_PRFM: \n\t" " \n\t" CLEAR_COL20(z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19) " \n\t" diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c index 20841891b..deb01f9fe 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_s2vx10_unindexed.c @@ -139,7 +139,7 @@ GEMM_ACOL_CONTIGUOUS_LOAD(z28,z29,p0,p0,x0) " prfm PLDL1STRM, [x16] \n\t" " add x16, x16, x7 \n\t" " prfm PLDL1STRM, [x16] \n\t" -// " END_CCOL_PRFM: \n\t" +" END_CCOL_PRFM: \n\t" " \n\t" CLEAR_COL20(z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19) " \n\t" diff --git a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c index 7e630894f..e941f5abd 100644 --- a/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c +++ b/kernels/armsve/3/bli_gemm_armsve_asm_z2vx10_unindexed.c @@ -140,7 +140,7 @@ GEMM_ACOLCMPLX_CONTIGUOUS_LOAD_FWD(z28,z29,p0,%0,x2) " prfm PLDL1KEEP, [x16] \n\t" " add x16, x16, %4 \n\t" " prfm PLDL1KEEP, [x16] \n\t" -// " END_CCOL_PRFM: \n\t" +" END_CCOL_PRFM: \n\t" " \n\t" CLEAR_COL20(z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z19) " \n\t"