Arm SVE ZGEMM 2Vx10 Unindex Process Alpha=1.0

This commit is contained in:
RuQing Xu
2021-09-19 23:30:42 +09:00
parent f7c6c2b119
commit 9e1e781cb5

View File

@@ -42,6 +42,12 @@
// 2vx10 microkernels.
#include "armsve_asm_2vx10cmplx.h"
#define MOV_COL2(ZD0Re,ZD0Im,ZD1Re,ZD1Im,Z0Re,Z0Im,Z1Re,Z1Im) \
" mov "#ZD0Re".d, "#Z0Re".d \n\t" \
" mov "#ZD0Im".d, "#Z0Im".d \n\t" \
" mov "#ZD1Re".d, "#Z1Re".d \n\t" \
" mov "#ZD1Im".d, "#Z1Im".d \n\t"
void bli_zgemm_armsve_asm_2vx10_unindexed
(
dim_t k0,
@@ -205,14 +211,26 @@ GEMM_2VX10CMPLX_MKER_LOOP_PLAIN_C_1_RESIDUAL(z0,z2,z4,z6,z8,z10,z12,z14,z16,z18,
" prfm PLDL1STRM, [%10, 256*1] \n\t"
" \n\t"
" WRITE_MEM: \n\t"
" fmov d27, #1.0 \n\t"
" fcmp d29, #0.0 \n\t" // Whether Imag(alpha) == 0.
" fccmp d28, d27, 0, eq \n\t" // Whether Real(alpha) == 1.
" b.eq UNIT_ALPHA \n\t"
" \n\t"
GEMM_FMULCMPLX_COL2(z20,z21,z22,z23,p0,z0 ,z1 ,z2 ,z3 ,z28,z29)
GEMM_FMULCMPLX_COL2(z24,z25,z26,z27,p0,z4 ,z5 ,z6 ,z7 ,z28,z29)
GEMM_FMULCMPLX_COL2(z0 ,z1 ,z2 ,z3 ,p0,z8, z9, z10,z11,z28,z29)
GEMM_FMULCMPLX_COL2(z4 ,z5 ,z6 ,z7 ,p0,z12,z13,z14,z15,z28,z29)
GEMM_FMULCMPLX_COL2(z8 ,z9 ,z10,z11,p0,z16,z17,z18,z19,z28,z29)
" b WRITE_MEM_EXEC \n\t"
" \n\t"
" UNIT_ALPHA: \n\t"
MOV_COL2(z20,z21,z22,z23,z0 ,z1 ,z2 ,z3 )
MOV_COL2(z24,z25,z26,z27,z4 ,z5 ,z6 ,z7 )
MOV_COL2(z0 ,z1 ,z2 ,z3 ,z8, z9, z10,z11)
MOV_COL2(z4 ,z5 ,z6 ,z7 ,z12,z13,z14,z15)
MOV_COL2(z8 ,z9 ,z10,z11,z16,z17,z18,z19)
" \n\t"
" WRITE_MEM_EXEC: \n\t"
" mov x9, %2 \n\t" // C address for loading.
" \n\t" // C address for storing is %2 itself.
" cmp %3, #1 \n\t"