Merge pull request #543 from xrq-phys/armsve-packm-fix

ARMSVE Block SVE-Intrinsic Kernels for GCC 8-9
This commit is contained in:
Devin Matthews
2021-10-09 15:53:54 -05:00
committed by GitHub
5 changed files with 18 additions and 17 deletions

View File

@@ -60,9 +60,10 @@ void bli_cntx_init_a64fx( cntx_t* cntx )
// Set SVE-512 packing routine.
bli_cntx_set_packm_kers
(
3,
2,
BLIS_PACKM_10XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_10xk,
BLIS_PACKM_12XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_12xk,
// 12xk is not used and disabled for GCC 8-9 compatibility.
// BLIS_PACKM_12XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_int_12xk,
BLIS_PACKM_16XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_16xk,
cntx
);

View File

@@ -74,9 +74,8 @@ void bli_cntx_init_armsve( cntx_t* cntx )
if (m_r_d==16)
bli_cntx_set_packm_kers
(
3,
2,
BLIS_PACKM_10XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_10xk,
BLIS_PACKM_12XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_12xk,
BLIS_PACKM_16XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_16xk,
cntx
);
@@ -84,7 +83,7 @@ void bli_cntx_init_armsve( cntx_t* cntx )
bli_cntx_set_packm_kers
(
1,
BLIS_PACKM_8XK_KER, BLIS_DOUBLE, bli_dpackm_armsve256_asm_8xk,
BLIS_PACKM_8XK_KER, BLIS_DOUBLE, bli_dpackm_armsve256_int_8xk,
cntx
);

View File

@@ -35,17 +35,14 @@
#include "blis.h"
#ifdef __ARM_FEATURE_SVE
#if (defined(BLIS_FAMILY_ARMSVE) && !defined(BLIS_FAMILY_A64FX))
#include <arm_sve.h>
#else
#error "No Arm SVE intrinsics support in compiler"
#endif // __ARM_FEATURE_SVE
// assumption:
// SVE vector length = 256 bits.
//
void bli_dpackm_armsve256_asm_8xk
void bli_dpackm_armsve256_int_8xk
(
conj_t conja,
pack_t schema,
@@ -230,3 +227,5 @@ void bli_dpackm_armsve256_asm_8xk
);
}
}
#endif // __has_include(<arm_sve.h>)

View File

@@ -36,11 +36,8 @@
#include "blis.h"
#include <stdio.h>
#ifdef __ARM_FEATURE_SVE
#if (defined(BLIS_FAMILY_ARMSVE) && !defined(BLIS_FAMILY_A64FX))
#include <arm_sve.h>
#else
#error "No Arm SVE intrinsics support in compiler"
#endif // __ARM_FEATURE_SVE
// assumption:
// SVE vector length = 512 bits.
@@ -48,7 +45,7 @@
// 2-rows -> 3 vectors packing and use predicator only in odd num of rows to be packed.
// prefetching is needed.
void bli_dpackm_armsve512_asm_12xk
void bli_dpackm_armsve512_int_12xk
(
conj_t conja,
pack_t schema,
@@ -357,3 +354,5 @@ void bli_dpackm_armsve512_asm_12xk
);
}
}
#endif // __has_include(<arm_sve.h>)

View File

@@ -43,7 +43,10 @@ GEMM_UKR_PROT( dcomplex, z, gemm_armsve_asm_2vx7_unindexed )
//GEMMSUP_KER_PROT( double, d, gemmsup_cv_armsve_2vx10_unindexed )
//GEMMSUP_KER_PROT( double, d, gemmsup_rv_armsve_10x2v_unindexed )
PACKM_KER_PROT( double, d, packm_armsve256_asm_8xk )
// Use SVE intrinsics only for referred cases.
#if (defined(BLIS_FAMILY_ARMSVE) && !defined(BLIS_FAMILY_A64FX))
PACKM_KER_PROT( double, d, packm_armsve256_int_8xk )
PACKM_KER_PROT( double, d, packm_armsve512_int_12xk )
#endif
PACKM_KER_PROT( double, d, packm_armsve512_asm_16xk )
PACKM_KER_PROT( double, d, packm_armsve512_asm_12xk )
PACKM_KER_PROT( double, d, packm_armsve512_asm_10xk )