mirror of
https://github.com/amd/blis.git
synced 2026-04-19 23:28:52 +00:00
Merge pull request #543 from xrq-phys/armsve-packm-fix
ARMSVE Block SVE-Intrinsic Kernels for GCC 8-9
This commit is contained in:
@@ -60,9 +60,10 @@ void bli_cntx_init_a64fx( cntx_t* cntx )
|
||||
// Set SVE-512 packing routine.
|
||||
bli_cntx_set_packm_kers
|
||||
(
|
||||
3,
|
||||
2,
|
||||
BLIS_PACKM_10XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_10xk,
|
||||
BLIS_PACKM_12XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_12xk,
|
||||
// 12xk is not used and disabled for GCC 8-9 compatibility.
|
||||
// BLIS_PACKM_12XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_int_12xk,
|
||||
BLIS_PACKM_16XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_16xk,
|
||||
cntx
|
||||
);
|
||||
|
||||
@@ -74,9 +74,8 @@ void bli_cntx_init_armsve( cntx_t* cntx )
|
||||
if (m_r_d==16)
|
||||
bli_cntx_set_packm_kers
|
||||
(
|
||||
3,
|
||||
2,
|
||||
BLIS_PACKM_10XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_10xk,
|
||||
BLIS_PACKM_12XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_12xk,
|
||||
BLIS_PACKM_16XK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_16xk,
|
||||
cntx
|
||||
);
|
||||
@@ -84,7 +83,7 @@ void bli_cntx_init_armsve( cntx_t* cntx )
|
||||
bli_cntx_set_packm_kers
|
||||
(
|
||||
1,
|
||||
BLIS_PACKM_8XK_KER, BLIS_DOUBLE, bli_dpackm_armsve256_asm_8xk,
|
||||
BLIS_PACKM_8XK_KER, BLIS_DOUBLE, bli_dpackm_armsve256_int_8xk,
|
||||
cntx
|
||||
);
|
||||
|
||||
|
||||
@@ -35,17 +35,14 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#ifdef __ARM_FEATURE_SVE
|
||||
#if (defined(BLIS_FAMILY_ARMSVE) && !defined(BLIS_FAMILY_A64FX))
|
||||
#include <arm_sve.h>
|
||||
#else
|
||||
#error "No Arm SVE intrinsics support in compiler"
|
||||
#endif // __ARM_FEATURE_SVE
|
||||
|
||||
// assumption:
|
||||
// SVE vector length = 256 bits.
|
||||
//
|
||||
|
||||
void bli_dpackm_armsve256_asm_8xk
|
||||
void bli_dpackm_armsve256_int_8xk
|
||||
(
|
||||
conj_t conja,
|
||||
pack_t schema,
|
||||
@@ -230,3 +227,5 @@ void bli_dpackm_armsve256_asm_8xk
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __has_include(<arm_sve.h>)
|
||||
@@ -36,11 +36,8 @@
|
||||
#include "blis.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __ARM_FEATURE_SVE
|
||||
#if (defined(BLIS_FAMILY_ARMSVE) && !defined(BLIS_FAMILY_A64FX))
|
||||
#include <arm_sve.h>
|
||||
#else
|
||||
#error "No Arm SVE intrinsics support in compiler"
|
||||
#endif // __ARM_FEATURE_SVE
|
||||
|
||||
// assumption:
|
||||
// SVE vector length = 512 bits.
|
||||
@@ -48,7 +45,7 @@
|
||||
// 2-rows -> 3 vectors packing and use predicator only in odd num of rows to be packed.
|
||||
// prefetching is needed.
|
||||
|
||||
void bli_dpackm_armsve512_asm_12xk
|
||||
void bli_dpackm_armsve512_int_12xk
|
||||
(
|
||||
conj_t conja,
|
||||
pack_t schema,
|
||||
@@ -357,3 +354,5 @@ void bli_dpackm_armsve512_asm_12xk
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __has_include(<arm_sve.h>)
|
||||
@@ -43,7 +43,10 @@ GEMM_UKR_PROT( dcomplex, z, gemm_armsve_asm_2vx7_unindexed )
|
||||
//GEMMSUP_KER_PROT( double, d, gemmsup_cv_armsve_2vx10_unindexed )
|
||||
//GEMMSUP_KER_PROT( double, d, gemmsup_rv_armsve_10x2v_unindexed )
|
||||
|
||||
PACKM_KER_PROT( double, d, packm_armsve256_asm_8xk )
|
||||
// Use SVE intrinsics only for referred cases.
|
||||
#if (defined(BLIS_FAMILY_ARMSVE) && !defined(BLIS_FAMILY_A64FX))
|
||||
PACKM_KER_PROT( double, d, packm_armsve256_int_8xk )
|
||||
PACKM_KER_PROT( double, d, packm_armsve512_int_12xk )
|
||||
#endif
|
||||
PACKM_KER_PROT( double, d, packm_armsve512_asm_16xk )
|
||||
PACKM_KER_PROT( double, d, packm_armsve512_asm_12xk )
|
||||
PACKM_KER_PROT( double, d, packm_armsve512_asm_10xk )
|
||||
|
||||
Reference in New Issue
Block a user