mirror of
https://github.com/amd/blis.git
synced 2026-05-26 07:25:28 +00:00
Arm SVE ZGEMM 2Vx10 Unindex Process Alpha=1.0
This commit is contained in:
@@ -42,6 +42,12 @@
|
||||
// 2vx10 microkernels.
|
||||
#include "armsve_asm_2vx10cmplx.h"
|
||||
|
||||
#define MOV_COL2(ZD0Re,ZD0Im,ZD1Re,ZD1Im,Z0Re,Z0Im,Z1Re,Z1Im) \
|
||||
" mov "#ZD0Re".d, "#Z0Re".d \n\t" \
|
||||
" mov "#ZD0Im".d, "#Z0Im".d \n\t" \
|
||||
" mov "#ZD1Re".d, "#Z1Re".d \n\t" \
|
||||
" mov "#ZD1Im".d, "#Z1Im".d \n\t"
|
||||
|
||||
void bli_zgemm_armsve_asm_2vx10_unindexed
|
||||
(
|
||||
dim_t k0,
|
||||
@@ -205,14 +211,26 @@ GEMM_2VX10CMPLX_MKER_LOOP_PLAIN_C_1_RESIDUAL(z0,z2,z4,z6,z8,z10,z12,z14,z16,z18,
|
||||
" prfm PLDL1STRM, [%10, 256*1] \n\t"
|
||||
" \n\t"
|
||||
" WRITE_MEM: \n\t"
|
||||
" fmov d27, #1.0 \n\t"
|
||||
" fcmp d29, #0.0 \n\t" // Whether Imag(alpha) == 0.
|
||||
" fccmp d28, d27, 0, eq \n\t" // Whether Real(alpha) == 1.
|
||||
" b.eq UNIT_ALPHA \n\t"
|
||||
" \n\t"
|
||||
GEMM_FMULCMPLX_COL2(z20,z21,z22,z23,p0,z0 ,z1 ,z2 ,z3 ,z28,z29)
|
||||
GEMM_FMULCMPLX_COL2(z24,z25,z26,z27,p0,z4 ,z5 ,z6 ,z7 ,z28,z29)
|
||||
GEMM_FMULCMPLX_COL2(z0 ,z1 ,z2 ,z3 ,p0,z8, z9, z10,z11,z28,z29)
|
||||
GEMM_FMULCMPLX_COL2(z4 ,z5 ,z6 ,z7 ,p0,z12,z13,z14,z15,z28,z29)
|
||||
GEMM_FMULCMPLX_COL2(z8 ,z9 ,z10,z11,p0,z16,z17,z18,z19,z28,z29)
|
||||
" b WRITE_MEM_EXEC \n\t"
|
||||
" \n\t"
|
||||
" UNIT_ALPHA: \n\t"
|
||||
MOV_COL2(z20,z21,z22,z23,z0 ,z1 ,z2 ,z3 )
|
||||
MOV_COL2(z24,z25,z26,z27,z4 ,z5 ,z6 ,z7 )
|
||||
MOV_COL2(z0 ,z1 ,z2 ,z3 ,z8, z9, z10,z11)
|
||||
MOV_COL2(z4 ,z5 ,z6 ,z7 ,z12,z13,z14,z15)
|
||||
MOV_COL2(z8 ,z9 ,z10,z11,z16,z17,z18,z19)
|
||||
" \n\t"
|
||||
" WRITE_MEM_EXEC: \n\t"
|
||||
" mov x9, %2 \n\t" // C address for loading.
|
||||
" \n\t" // C address for storing is %2 itself.
|
||||
" cmp %3, #1 \n\t"
|
||||
|
||||
Reference in New Issue
Block a user