mirror of
https://github.com/amd/blis.git
synced 2026-05-13 10:35:38 +00:00
Arm SVE Config armsve Use ZGEMM/CGEMM
This commit is contained in:
@@ -89,4 +89,6 @@ void PASTEMAC(ch, _blksz_armsve) (dim_t *m_r_, dim_t *n_r_, \
|
||||
|
||||
EXPANDMAC_BLKSZ_ARMSVE( s, 4 )
|
||||
EXPANDMAC_BLKSZ_ARMSVE( d, 8 )
|
||||
EXPANDMAC_BLKSZ_ARMSVE( c, 8 )
|
||||
EXPANDMAC_BLKSZ_ARMSVE( z, 16 )
|
||||
|
||||
|
||||
@@ -39,4 +39,6 @@ dim_t bli_vl_bits_armsve(void);
|
||||
|
||||
void bli_s_blksz_armsve(dim_t *m_r_, dim_t *n_r_, dim_t *k_c_, dim_t *m_c_, dim_t *n_c_);
|
||||
void bli_d_blksz_armsve(dim_t *m_r_, dim_t *n_r_, dim_t *k_c_, dim_t *m_c_, dim_t *n_c_);
|
||||
void bli_c_blksz_armsve(dim_t *m_r_, dim_t *n_r_, dim_t *k_c_, dim_t *m_c_, dim_t *n_c_);
|
||||
void bli_z_blksz_armsve(dim_t *m_r_, dim_t *n_r_, dim_t *k_c_, dim_t *m_c_, dim_t *n_c_);
|
||||
|
||||
|
||||
@@ -50,17 +50,23 @@ void bli_cntx_init_armsve( cntx_t* cntx )
|
||||
// Block size.
|
||||
dim_t m_r_s, n_r_s, k_c_s, m_c_s, n_c_s;
|
||||
dim_t m_r_d, n_r_d, k_c_d, m_c_d, n_c_d;
|
||||
dim_t m_r_c, n_r_c, k_c_c, m_c_c, n_c_c;
|
||||
dim_t m_r_z, n_r_z, k_c_z, m_c_z, n_c_z;
|
||||
bli_s_blksz_armsve(&m_r_s, &n_r_s, &k_c_s, &m_c_s, &n_c_s);
|
||||
bli_d_blksz_armsve(&m_r_d, &n_r_d, &k_c_d, &m_c_d, &n_c_d);
|
||||
bli_c_blksz_armsve(&m_r_c, &n_r_c, &k_c_c, &m_c_c, &n_c_c);
|
||||
bli_z_blksz_armsve(&m_r_z, &n_r_z, &k_c_z, &m_c_z, &n_c_z);
|
||||
|
||||
// Update the context with optimized native gemm micro-kernels and
|
||||
// their storage preferences.
|
||||
bli_cntx_set_l3_nat_ukrs
|
||||
(
|
||||
2,
|
||||
4,
|
||||
// These are vector-length agnostic kernels. Yet knowing mr is required at runtime.
|
||||
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armsve_asm_2vx10_unindexed, FALSE,
|
||||
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armsve_asm_2vx10_unindexed, FALSE,
|
||||
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armsve_asm_2vx10_unindexed, FALSE,
|
||||
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armsve_asm_2vx10_unindexed, FALSE,
|
||||
BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_armsve_asm_2vx10_unindexed, FALSE,
|
||||
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_armsve_asm_2vx10_unindexed, FALSE,
|
||||
cntx
|
||||
);
|
||||
|
||||
@@ -84,11 +90,11 @@ void bli_cntx_init_armsve( cntx_t* cntx )
|
||||
|
||||
// Initialize level-3 blocksize objects with architecture-specific values.
|
||||
// s d c z
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], m_r_s, m_r_d, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], n_r_s, n_r_d, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], m_c_s, m_c_d, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], k_c_s, k_c_d, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], n_c_s, n_c_d, -1, -1 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], m_r_s, m_r_d, m_r_c, m_r_z );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], n_r_s, n_r_d, n_r_c, n_r_z );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], m_c_s, m_c_d, m_c_c, m_c_z );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], k_c_s, k_c_d, k_c_c, k_c_z );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], n_c_s, n_c_d, n_c_c, n_c_z );
|
||||
|
||||
// Update the context with the current architecture's register and cache
|
||||
// blocksizes (and multiples) for native execution.
|
||||
|
||||
Reference in New Issue
Block a user