Cast dim_t and inc_t parameters to 64-bit in KNL microkernels.

This commit is contained in:
Devin Matthews
2017-02-19 21:10:55 -05:00
parent c362afc525
commit 7d42fc0796
4 changed files with 54 additions and 26 deletions

View File

@@ -1,6 +1,6 @@
/*
BLIS
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
@@ -105,16 +105,22 @@ extern int32_t offsets[24];
void bli_dpackm_8xk_opt
(
conj_t conja,
dim_t n,
dim_t n_,
void* restrict kappa_,
void* restrict a_, inc_t inca, inc_t lda,
void* restrict p_, inc_t ldp
void* restrict a_, inc_t inca_, inc_t lda_,
void* restrict p_, inc_t ldp_
)
{
(void)conja;
const int32_t * offsetPtr = &offsets[0];
double* a = (double*)a_;
double* p = (double*)p_;
double* kappa = (double*)kappa_;
const int64_t n = n_;
const int64_t inca = inca_;
const int64_t lda = lda_;
const int64_t ldp = ldp_;
__asm__ volatile
(
@@ -291,16 +297,22 @@ void bli_dpackm_8xk_opt
void bli_dpackm_24xk_opt
(
conj_t conja,
dim_t n,
dim_t n_,
void* restrict kappa_,
void* restrict a_, inc_t inca, inc_t lda,
void* restrict p_, inc_t ldp
void* restrict a_, inc_t inca_, inc_t lda_,
void* restrict p_, inc_t ldp_
)
{
(void)conja;
const int32_t * offsetPtr = &offsets[0];
double* a = (double*)a_;
double* p = (double*)p_;
double* kappa = (double*)kappa_;
const int64_t n = n_;
const int64_t inca = inca_;
const int64_t lda = lda_;
const int64_t ldp = ldp_;
__asm__ volatile
(

View File

@@ -1,6 +1,6 @@
/*
BLIS
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
@@ -133,16 +133,22 @@ extern int32_t offsets[32];
void bli_dpackm_30xk_opt
(
conj_t conja,
dim_t n,
dim_t n_,
void* restrict kappa_,
void* restrict a_, inc_t inca, inc_t lda,
void* restrict p_, inc_t ldp
void* restrict a_, inc_t inca_, inc_t lda_,
void* restrict p_, inc_t ldp_
)
{
(void)conja;
const int32_t * offsetPtr = &offsets[0];
double* a = (double*)a_;
double* p = (double*)p_;
double* kappa = (double*)kappa_;
const int64_t n = n_;
const int64_t inca = inca_;
const int64_t lda = lda_;
const int64_t ldp = ldp_;
__asm__ volatile
(

View File

@@ -1,6 +1,6 @@
/*
BLIS
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
@@ -181,22 +181,26 @@ extern int32_t offsets[24];
//#define MONITORS
//#define LOOPMON
void bli_dgemm_opt_24x8(
dim_t k,
dim_t k_,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
double* restrict c, inc_t rs_c_, inc_t cs_c_,
auxinfo_t* data,
cntx_t* restrict cntx
)
{
(void)data;
(void)cntx;
const double * a_next = bli_auxinfo_next_a( data );
const double * b_next = bli_auxinfo_next_b( data );
const int32_t * offsetPtr = &offsets[0];
uint64_t k64 = k;
const int64_t k = k_;
const int64_t rs_c = rs_c_;
const int64_t cs_c = cs_c_;
#ifdef MONITORS
int toph, topl, both, botl, midl, midh, mid2l, mid2h;
@@ -204,7 +208,7 @@ void bli_dgemm_opt_24x8(
#ifdef LOOPMON
int tlooph, tloopl, blooph, bloopl;
#endif
__asm__ volatile
(
#ifdef MONITORS
@@ -223,22 +227,22 @@ void bli_dgemm_opt_24x8(
VMOVAPS(ZMM(15), ZMM(8)) MOV(RDI, VAR(offsetPtr))
VMOVAPS(ZMM(16), ZMM(8)) VMOVAPS(ZMM(4), MEM(RDI))
#if SCATTER_PREFETCH_C
VMOVAPS(ZMM(17), ZMM(8))
VMOVAPS(ZMM(18), ZMM(8))
VMOVAPS(ZMM(17), ZMM(8))
VMOVAPS(ZMM(18), ZMM(8))
VMOVAPS(ZMM(19), ZMM(8)) VBROADCASTSS(ZMM(5), VAR(rs_c))
VMOVAPS(ZMM(20), ZMM(8))
VMOVAPS(ZMM(20), ZMM(8))
VMOVAPS(ZMM(21), ZMM(8)) VPMULLD(ZMM(2), ZMM(4), ZMM(5))
VMOVAPS(ZMM(22), ZMM(8)) VMOVAPS(YMM(3), MEM(RDI,64))
VMOVAPS(ZMM(23), ZMM(8)) VPMULLD(YMM(3), YMM(3), YMM(5))
#else
VMOVAPS(ZMM(17), ZMM(8))
VMOVAPS(ZMM(17), ZMM(8))
VMOVAPS(ZMM(18), ZMM(8)) LEA(R13, MEM(R12,R12,2))
VMOVAPS(ZMM(19), ZMM(8)) LEA(R14, MEM(R12,R12,4))
VMOVAPS(ZMM(20), ZMM(8)) LEA(R15, MEM(R13,R12,4))
VMOVAPS(ZMM(21), ZMM(8))
VMOVAPS(ZMM(22), ZMM(8))
VMOVAPS(ZMM(23), ZMM(8))
#endif
#endif
VMOVAPS(ZMM(24), ZMM(8)) VPSLLD(ZMM(4), ZMM(4), IMM(3))
VMOVAPS(ZMM(25), ZMM(8)) MOV(R8, IMM(4*24*8)) //offset for 4 iterations
VMOVAPS(ZMM(26), ZMM(8)) LEA(R9, MEM(R8,R8,2)) //*3
@@ -670,7 +674,7 @@ void bli_dgemm_opt_24x8(
[both] "=m" (both)
#endif
: // input operands
[k] "m" (k64),
[k] "m" (k),
[a] "m" (a),
[b] "m" (b),
[alpha] "m" (alpha),

View File

@@ -1,6 +1,6 @@
/*
BLIS
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
@@ -166,20 +166,26 @@ int32_t offsets[32] __attribute__((aligned(0x1000))) = { 0, 1, 2, 3, 4, 5,
//#define MONITORS
//#define LOOPMON
void bli_sgemm_opt_30x16_knc(
dim_t k,
dim_t k_,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
float* restrict c, inc_t rs_c_, inc_t cs_c_,
auxinfo_t* data,
cntx_t* restrict cntx
)
{
(void)data;
(void)cntx;
const float * a_next = bli_auxinfo_next_a( data );
const float * b_next = bli_auxinfo_next_b( data );
const int32_t * offsetPtr = &offsets[0];
const int64_t k = k_;
const int64_t rs_c = rs_c_;
const int64_t cs_c = cs_c_;
#ifdef MONITORS
int toph, topl, both, botl, midl, midh, mid2l, mid2h;