diff --git a/common.mk b/common.mk index 4a5c5b8d5..712482d82 100644 --- a/common.mk +++ b/common.mk @@ -1,6 +1,6 @@ # # -# BLIS +# BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # @@ -626,7 +626,7 @@ endif # Disable tautological comparision warnings in clang. ifeq ($(CC_VENDOR),clang) -CWARNFLAGS += -Wno-tautological-compare +CWARNFLAGS += -Wno-tautological-compare -Wno-pass-failed endif $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CWARNFLAGS,$(c)))) diff --git a/frame/base/bli_pool.c b/frame/base/bli_pool.c index 350d59b73..08876c68a 100644 --- a/frame/base/bli_pool.c +++ b/frame/base/bli_pool.c @@ -124,15 +124,15 @@ void bli_pool_finalize // Query the total number of blocks currently allocated. const siz_t num_blocks = bli_pool_num_blocks( pool ); - // Query the top_index of the pool. - const siz_t top_index = bli_pool_top_index( pool ); - // NOTE: This sanity check has been disabled because bli_pool_reinit() // is currently implemented in terms of bli_pool_finalize() followed by // bli_pool_init(). If that _reinit() takes place when some blocks are // checked out, then we would expect top_index != 0, and therefore this // check is not universally appropriate. #if 0 + // Query the top_index of the pool. + const siz_t top_index = bli_pool_top_index( pool ); + // Sanity check: The top_index should be zero. if ( top_index != 0 ) { diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_c3xk.c b/kernels/haswell/1m/bli_packm_haswell_asm_c3xk.c index 273caeb3d..843335ad5 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_c3xk.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_c3xk.c @@ -104,10 +104,10 @@ void bli_cpackm_haswell_asm_3xk // ------------------------------------------------------------------------- - if ( cdim0 == mnr && !gs && !bli_does_conj( conja ) && unitk ) + if ( cdim0 == mnr && !gs && !conja && unitk ) { begin_asm() - + mov(var(a), rax) // load address of a. mov(var(inca), r8) // load inca @@ -122,14 +122,14 @@ void bli_cpackm_haswell_asm_3xk mov(var(one), rdx) // load address of 1.0 constant vbroadcastss(mem(rdx, 0), ymm1) // load 1.0 and duplicate vxorps(ymm0, ymm0, ymm0) // set ymm0 to 0.0. - + mov(var(kappa), rcx) // load address of kappa vbroadcastss(mem(rcx, 0), ymm10) // load kappa_r and duplicate vbroadcastss(mem(rcx, 4), ymm11) // load kappa_i and duplicate - + // now branch on kappa == 1.0 - + vucomiss(xmm1, xmm10) // set ZF if kappa_r == 1.0. sete(r12b) // r12b = ( ZF == 1 ? 1 : 0 ); vucomiss(xmm0, xmm11) // set ZF if kappa_i == 0.0. @@ -143,7 +143,7 @@ void bli_cpackm_haswell_asm_3xk cmp(imm(8), r8) // set ZF if (8*inca) == 8. jz(.CCOLNONU) // jump to column storage case - + // -- kappa non-unit, row storage on A ------------------------------------- label(.CROWNONU) @@ -156,7 +156,7 @@ void bli_cpackm_haswell_asm_3xk label(.CCOLNONU) jmp(.CDONE) // jump to end. - + @@ -167,7 +167,7 @@ void bli_cpackm_haswell_asm_3xk // -- kappa unit, row storage on A ----------------------------------------- - + label(.CROWUNIT) //lea(mem(r8, r8, 2), r12) // r12 = 3*inca @@ -251,7 +251,7 @@ void bli_cpackm_haswell_asm_3xk // -- kappa unit, column storage on A -------------------------------------- label(.CCOLUNIT) - + lea(mem(r10, r10, 2), r13) // r13 = 3*lda mov(var(k_iter), rsi) // i = k_iter; @@ -315,8 +315,8 @@ void bli_cpackm_haswell_asm_3xk label(.CDONE) - - + + end_asm( : // output operands (none) @@ -370,7 +370,7 @@ void bli_cpackm_haswell_asm_3xk ( m_edge, n_edge, - p_edge, 1, ldp + p_edge, 1, ldp ); } } @@ -390,7 +390,7 @@ void bli_cpackm_haswell_asm_3xk ( m_edge, n_edge, - p_edge, 1, ldp + p_edge, 1, ldp ); } } diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_c8xk.c b/kernels/haswell/1m/bli_packm_haswell_asm_c8xk.c index be6877e71..862a33b86 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_c8xk.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_c8xk.c @@ -104,10 +104,10 @@ void bli_cpackm_haswell_asm_8xk // ------------------------------------------------------------------------- - if ( cdim0 == mnr && !gs && !bli_does_conj( conja ) && unitk ) + if ( cdim0 == mnr && !gs && !conja && unitk ) { begin_asm() - + mov(var(a), rax) // load address of a. mov(var(inca), r8) // load inca @@ -122,14 +122,14 @@ void bli_cpackm_haswell_asm_8xk mov(var(one), rdx) // load address of 1.0 constant vbroadcastss(mem(rdx, 0), ymm1) // load 1.0 and duplicate vxorps(ymm0, ymm0, ymm0) // set ymm0 to 0.0. - + mov(var(kappa), rcx) // load address of kappa vbroadcastss(mem(rcx, 0), ymm10) // load kappa_r and duplicate vbroadcastss(mem(rcx, 4), ymm11) // load kappa_i and duplicate - + // now branch on kappa == 1.0 - + vucomiss(xmm1, xmm10) // set ZF if kappa_r == 1.0. sete(r12b) // r12b = ( ZF == 1 ? 1 : 0 ); vucomiss(xmm0, xmm11) // set ZF if kappa_i == 0.0. @@ -143,7 +143,7 @@ void bli_cpackm_haswell_asm_8xk cmp(imm(8), r8) // set ZF if (8*inca) == 8. jz(.CCOLNONU) // jump to column storage case - + // -- kappa non-unit, row storage on A ------------------------------------- label(.CROWNONU) @@ -156,7 +156,7 @@ void bli_cpackm_haswell_asm_8xk label(.CCOLNONU) jmp(.CDONE) // jump to end. - + @@ -167,7 +167,7 @@ void bli_cpackm_haswell_asm_8xk // -- kappa unit, row storage on A ----------------------------------------- - + label(.CROWUNIT) lea(mem(r8, r8, 2), r12) // r12 = 3*inca @@ -271,7 +271,7 @@ void bli_cpackm_haswell_asm_8xk // -- kappa unit, column storage on A -------------------------------------- label(.CCOLUNIT) - + lea(mem(r10, r10, 2), r13) // r13 = 3*lda mov(var(k_iter), rsi) // i = k_iter; @@ -335,8 +335,8 @@ void bli_cpackm_haswell_asm_8xk label(.CDONE) - - + + end_asm( : // output operands (none) @@ -390,7 +390,7 @@ void bli_cpackm_haswell_asm_8xk ( m_edge, n_edge, - p_edge, 1, ldp + p_edge, 1, ldp ); } } @@ -408,7 +408,7 @@ void bli_cpackm_haswell_asm_8xk ( m_edge, n_edge, - p_edge, 1, ldp + p_edge, 1, ldp ); } } diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_z3xk.c b/kernels/haswell/1m/bli_packm_haswell_asm_z3xk.c index 26b98f4da..1a714abe2 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_z3xk.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_z3xk.c @@ -104,10 +104,10 @@ void bli_zpackm_haswell_asm_3xk // ------------------------------------------------------------------------- - if ( cdim0 == mnr && !gs && !bli_does_conj( conja ) && unitk ) + if ( cdim0 == mnr && !gs && !conja && unitk ) { begin_asm() - + mov(var(a), rax) // load address of a. mov(var(inca), r8) // load inca @@ -124,14 +124,14 @@ void bli_zpackm_haswell_asm_3xk mov(var(one), rdx) // load address of 1.0 constant vbroadcastsd(mem(rdx, 0), ymm1) // load 1.0 and duplicate vxorpd(ymm0, ymm0, ymm0) // set ymm0 to 0.0. - + mov(var(kappa), rcx) // load address of kappa vbroadcastsd(mem(rcx, 0), ymm10) // load kappa_r and duplicate vbroadcastsd(mem(rcx, 8), ymm11) // load kappa_i and duplicate - + // now branch on kappa == 1.0 - + vucomisd(xmm1, xmm10) // set ZF if kappa_r == 1.0. sete(r12b) // r12b = ( ZF == 1 ? 1 : 0 ); vucomisd(xmm0, xmm11) // set ZF if kappa_i == 0.0. @@ -145,7 +145,7 @@ void bli_zpackm_haswell_asm_3xk cmp(imm(16), r8) // set ZF if (16*inca) == 16. jz(.ZCOLNONU) // jump to column storage case - + // -- kappa non-unit, row storage on A ------------------------------------- label(.ZROWNONU) @@ -158,7 +158,7 @@ void bli_zpackm_haswell_asm_3xk label(.ZCOLNONU) jmp(.ZDONE) // jump to end. - + @@ -169,7 +169,7 @@ void bli_zpackm_haswell_asm_3xk // -- kappa unit, row storage on A ----------------------------------------- - + label(.ZROWUNIT) //lea(mem(r8, r8, 2), r12) // r12 = 3*inca @@ -257,7 +257,7 @@ void bli_zpackm_haswell_asm_3xk // -- kappa unit, column storage on A -------------------------------------- label(.ZCOLUNIT) - + lea(mem(r10, r10, 2), r13) // r13 = 3*lda mov(var(k_iter), rsi) // i = k_iter; @@ -321,8 +321,8 @@ void bli_zpackm_haswell_asm_3xk label(.ZDONE) - - + + end_asm( : // output operands (none) @@ -376,7 +376,7 @@ void bli_zpackm_haswell_asm_3xk ( m_edge, n_edge, - p_edge, 1, ldp + p_edge, 1, ldp ); } } @@ -394,7 +394,7 @@ void bli_zpackm_haswell_asm_3xk ( m_edge, n_edge, - p_edge, 1, ldp + p_edge, 1, ldp ); } } diff --git a/kernels/haswell/1m/bli_packm_haswell_asm_z4xk.c b/kernels/haswell/1m/bli_packm_haswell_asm_z4xk.c index 655231754..4e11872af 100644 --- a/kernels/haswell/1m/bli_packm_haswell_asm_z4xk.c +++ b/kernels/haswell/1m/bli_packm_haswell_asm_z4xk.c @@ -104,10 +104,10 @@ void bli_zpackm_haswell_asm_4xk // ------------------------------------------------------------------------- - if ( cdim0 == mnr && !gs && !bli_does_conj( conja ) && unitk ) + if ( cdim0 == mnr && !gs && !conja && unitk ) { begin_asm() - + mov(var(a), rax) // load address of a. mov(var(inca), r8) // load inca @@ -128,10 +128,10 @@ void bli_zpackm_haswell_asm_4xk mov(var(kappa), rcx) // load address of kappa vbroadcastsd(mem(rcx, 0), ymm10) // load kappa_r and duplicate vbroadcastsd(mem(rcx, 8), ymm11) // load kappa_i and duplicate - + // now branch on kappa == 1.0 - + vucomisd(xmm1, xmm10) // set ZF if kappa_r == 1.0. sete(r12b) // r12b = ( ZF == 1 ? 1 : 0 ); vucomisd(xmm0, xmm11) // set ZF if kappa_i == 0.0. @@ -145,7 +145,7 @@ void bli_zpackm_haswell_asm_4xk cmp(imm(16), r8) // set ZF if (16*inca) == 16. jz(.ZCOLNONU) // jump to column storage case - + // -- kappa non-unit, row storage on A ------------------------------------- label(.ZROWNONU) @@ -158,7 +158,7 @@ void bli_zpackm_haswell_asm_4xk label(.ZCOLNONU) jmp(.ZDONE) // jump to end. - + @@ -169,7 +169,7 @@ void bli_zpackm_haswell_asm_4xk // -- kappa unit, row storage on A ----------------------------------------- - + label(.ZROWUNIT) lea(mem(r8, r8, 2), r12) // r12 = 3*inca @@ -267,7 +267,7 @@ void bli_zpackm_haswell_asm_4xk // -- kappa unit, column storage on A -------------------------------------- label(.ZCOLUNIT) - + lea(mem(r10, r10, 2), r13) // r13 = 3*lda mov(var(k_iter), rsi) // i = k_iter; @@ -331,8 +331,8 @@ void bli_zpackm_haswell_asm_4xk label(.ZDONE) - - + + end_asm( : // output operands (none) @@ -386,7 +386,7 @@ void bli_zpackm_haswell_asm_4xk ( m_edge, n_edge, - p_edge, 1, ldp + p_edge, 1, ldp ); } } @@ -404,7 +404,7 @@ void bli_zpackm_haswell_asm_4xk ( m_edge, n_edge, - p_edge, 1, ldp + p_edge, 1, ldp ); } }