diff --git a/config/mic/make_defs.mk b/config/mic/make_defs.mk index 23d5d9096..c894bc638 100644 --- a/config/mic/make_defs.mk +++ b/config/mic/make_defs.mk @@ -78,7 +78,7 @@ GIT_LOG := $(GIT) log --decorate # --- Determine the C compiler and related flags --- CC := icc CPPROCFLAGS := -CMISCFLAGS := -mmic -fasm-blocks -std=c99 -openmp +CMISCFLAGS := -mmic -fasm-blocks -std=c99 CDBGFLAGS := CWARNFLAGS := -Wall COPTFLAGS := -O3 @@ -98,7 +98,7 @@ ARFLAGS := cru # --- Determine the linker and related flags --- LINKER := $(CC) -LDFLAGS := -lm +LDFLAGS := -mmic -lm diff --git a/kernels/mic/3/bli_gemm_opt_30x8.c b/kernels/mic/3/bli_gemm_opt_30x8.c index 71e77f9b3..c4d0ff11a 100644 --- a/kernels/mic/3/bli_gemm_opt_30x8.c +++ b/kernels/mic/3/bli_gemm_opt_30x8.c @@ -275,8 +275,8 @@ void bli_dgemm_opt_30x8( auxinfo_t* data ) { - double * a_next = bli_auxinfo_next_a( aux ); - double * b_next = bli_auxinfo_next_b( aux ); + double * a_next = bli_auxinfo_next_a( data ); + double * b_next = bli_auxinfo_next_b( data ); int * offsetPtr = &offsets[0]; @@ -364,7 +364,7 @@ void bli_dgemm_opt_30x8( LOOPMAIN: ONE_ITER_MAIN_LOOP(rcx, rsi) jne LOOPMAIN - + //Penultimate 22 iterations. //Break these off from the main loop to avoid prefetching extra shit. mov r14, a_next @@ -398,20 +398,24 @@ void bli_dgemm_opt_30x8( POSTACCUM: - // jmp END + #ifdef MONITORS rdtsc mov mid2l, eax mov mid2h, edx #endif + + mov r9, c //load address of c for update + mov r12, alpha //load address of alpha + + // Check if C is row stride. If not, jump to the slow scattered update mov r14, cs_c dec r14 jne SCATTEREDUPDATE mov r14, beta vbroadcastsd zmm31, 0[r14] - mov r9, c //load address of c for update - mov r12, alpha //load address of alpha + vmulpd zmm0, zmm0, 0[r12]{1to8} vmulpd zmm1, zmm1, 0[r12]{1to8} @@ -526,7 +530,7 @@ void bli_dgemm_opt_30x8( vpbroadcastd zmm30, cs_c mov r13, beta vpmulld zmm30, zmm31, zmm30 - + mov ebx, 255 UPDATE_C_ROW_SCATTERED(zmm0, 0, r9) UPDATE_C_ROW_SCATTERED(zmm1, 1, r9) diff --git a/kernels/mic/3/bli_gemm_opt_30x8.h b/kernels/mic/3/bli_gemm_opt_30x8.h index 3f27b767f..76e0e2d8e 100644 --- a/kernels/mic/3/bli_gemm_opt_30x8.h +++ b/kernels/mic/3/bli_gemm_opt_30x8.h @@ -45,8 +45,7 @@ void PASTEMAC(ch,varname)( \ ctype* b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ - auxinfo_t* data, \ - dim_t thread_id \ + auxinfo_t* data \ ); INSERT_GENTPROT_BASIC( gemm_opt_30x8 )