diff --git a/config/armv7a/bli_config.h b/config/armv7a/bli_config.h index e1c28991c..bf01caefe 100644 --- a/config/armv7a/bli_config.h +++ b/config/armv7a/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/bgq/bli_config.h b/config/bgq/bli_config.h index 1cc951c52..39a627a6a 100644 --- a/config/bgq/bli_config.h +++ b/config/bgq/bli_config.h @@ -121,10 +121,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 32 - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/cortex-a15/bli_config.h b/config/cortex-a15/bli_config.h index b779d59df..f6be2e573 100644 --- a/config/cortex-a15/bli_config.h +++ b/config/cortex-a15/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/cortex-a9/bli_config.h b/config/cortex-a9/bli_config.h index b779d59df..f6be2e573 100644 --- a/config/cortex-a9/bli_config.h +++ b/config/cortex-a9/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/dunnington/bli_config.h b/config/dunnington/bli_config.h index badab1d5d..b397f3c94 100644 --- a/config/dunnington/bli_config.h +++ b/config/dunnington/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16 - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/loongson3a/bli_config.h b/config/loongson3a/bli_config.h index 849557543..23e32e10a 100644 --- a/config/loongson3a/bli_config.h +++ b/config/loongson3a/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16 - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/mic/bli_config.h b/config/mic/bli_config.h index 3c8250292..637e71f74 100644 --- a/config/mic/bli_config.h +++ b/config/mic/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_CACHE_LINE_SIZE - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/piledriver/bli_config.h b/config/piledriver/bli_config.h index 1115eb930..57ace8ba1 100644 --- a/config/piledriver/bli_config.h +++ b/config/piledriver/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_CACHE_LINE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_CACHE_LINE_SIZE - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/power7/bli_config.h b/config/power7/bli_config.h index 388828027..3e32f5367 100644 --- a/config/power7/bli_config.h +++ b/config/power7/bli_config.h @@ -116,10 +116,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16 - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/reference/bli_config.h b/config/reference/bli_config.h index b779d59df..f6be2e573 100644 --- a/config/reference/bli_config.h +++ b/config/reference/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/sandybridge/bli_config.h b/config/sandybridge/bli_config.h index e721c60c1..5816b5728 100644 --- a/config/sandybridge/bli_config.h +++ b/config/sandybridge/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/template/bli_config.h b/config/template/bli_config.h index 01010091d..5c2734f5f 100644 --- a/config/template/bli_config.h +++ b/config/template/bli_config.h @@ -118,10 +118,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE - // -- MIXED DATATYPE SUPPORT --------------------------------------------------- diff --git a/config/template/kernels/3/bli_gemm_opt_mxn.c b/config/template/kernels/3/bli_gemm_opt_mxn.c index d541fe06d..b32227fff 100644 --- a/config/template/kernels/3/bli_gemm_opt_mxn.c +++ b/config/template/kernels/3/bli_gemm_opt_mxn.c @@ -162,8 +162,7 @@ void bli_dgemm_opt_mxn( that exist (at the edges) is handled automatically within the macro-kernel. - Alignment of a1 and b1. The addresses a1 and b1 are aligned according - to the alignment value BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the - bli_config.h header file of the BLIS configuration. + to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively. - Unrolling loops. As a general rule of thumb, the loop over k is sometimes moderately unrolled; for example, in our experience, an unrolling factor of u = 4 is fairly common. If unrolling is applied diff --git a/config/template/kernels/3/bli_gemmtrsm_l_opt_mxn.c b/config/template/kernels/3/bli_gemmtrsm_l_opt_mxn.c index dcdfb8475..550a42f79 100644 --- a/config/template/kernels/3/bli_gemmtrsm_l_opt_mxn.c +++ b/config/template/kernels/3/bli_gemmtrsm_l_opt_mxn.c @@ -166,16 +166,8 @@ void bli_dgemmtrsm_l_opt_mxn( - Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation Notes for gemm. - Edge cases in MR, NR dimensions. See Implementation Notes for gemm. - - Alignment of a1 and b1. Unlike with gemm, the addresses a10/a12 and a11 - are not guaranteed to be aligned according to the alignment value - BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the bli_config.h header - file. This is because these micro-panels may vary in size due to the - triangular nature of matrix A. Instead, these addresses are aligned - to PACKMR x sizeof(type), where type is the datatype in question. To - support a somewhat obscure, higher-level optimization, we similarly - do not guarantee that b01/b21 and b11 are aligned to - BLIS_CONTIG_STRIDE_ALIGN_SIZE; instead, they are only aligned to - PACKNR x sizeof(type). + - Alignment of a1 and b1. The addresses a1 and b1 are aligned according + to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively. - Unrolling loops. Most optimized implementations should unroll all three loops within the trsm subproblem of gemmtrsm. See Implementation Notes for gemm for remarks on unrolling the gemm subproblem. diff --git a/config/template/kernels/3/bli_gemmtrsm_u_opt_mxn.c b/config/template/kernels/3/bli_gemmtrsm_u_opt_mxn.c index fe9773186..2ad6a81ad 100644 --- a/config/template/kernels/3/bli_gemmtrsm_u_opt_mxn.c +++ b/config/template/kernels/3/bli_gemmtrsm_u_opt_mxn.c @@ -164,16 +164,8 @@ void bli_dgemmtrsm_u_opt_mxn( - Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation Notes for gemm. - Edge cases in MR, NR dimensions. See Implementation Notes for gemm. - - Alignment of a1 and b1. Unlike with gemm, the addresses a10/a12 and a11 - are not guaranteed to be aligned according to the alignment value - BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the bli_config.h header - file. This is because these micro-panels may vary in size due to the - triangular nature of matrix A. Instead, these addresses are aligned - to PACKMR x sizeof(type), where type is the datatype in question. To - support a somewhat obscure, higher-level optimization, we similarly - do not guarantee that b01/b21 and b11 are aligned to - BLIS_CONTIG_STRIDE_ALIGN_SIZE; instead, they are only aligned to - PACKNR x sizeof(type). + - Alignment of a1 and b1. The addresses a1 and b1 are aligned according + to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively. - Unrolling loops. Most optimized implementations should unroll all three loops within the trsm subproblem of gemmtrsm. See Implementation Notes for gemm for remarks on unrolling the gemm subproblem. diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index 7601896e0..eac2a7a8e 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -336,12 +336,6 @@ void bli_packm_init_pack( bool_t densify, if ( pack_schema == BLIS_PACKED_ROW_PANELS_3M ) ps_p = ( ps_p * 3 ) / 2; - // Align the panel dimension according to the contiguous memory - // stride alignment size so that the second, third, etc panels begin - // at aligned addresses. - //ps_p = bli_align_dim_to_size( ps_p, elem_size_p, - // BLIS_CONTIG_STRIDE_ALIGN_SIZE ); - // Store the strides and panel dimension in p. bli_obj_set_incs( rs_p, cs_p, *p ); bli_obj_set_panel_dim( m_panel, *p ); @@ -383,12 +377,6 @@ void bli_packm_init_pack( bool_t densify, if ( pack_schema == BLIS_PACKED_COL_PANELS_3M ) ps_p = ( ps_p * 3 ) / 2; - // Align the panel dimension according to the contiguous memory - // stride alignment size so that the second, third, etc panels begin - // at aligned addresses. - //ps_p = bli_align_dim_to_size( ps_p, elem_size_p, - // BLIS_CONTIG_STRIDE_ALIGN_SIZE ); - // Store the strides and panel dimension in p. bli_obj_set_incs( rs_p, cs_p, *p ); bli_obj_set_panel_dim( n_panel, *p ); diff --git a/frame/include/bli_mem_pool_macro_defs.h b/frame/include/bli_mem_pool_macro_defs.h index 674569f27..b3fe7a3c7 100644 --- a/frame/include/bli_mem_pool_macro_defs.h +++ b/frame/include/bli_mem_pool_macro_defs.h @@ -210,19 +210,12 @@ // // Compute memory pool block sizes for single real. // + #define BLIS_MK_BLOCK_SIZE_S ( BLIS_POOL_MC_S * \ - ( BLIS_POOL_KC_S + \ - ( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \ - BLIS_SIZEOF_S \ - ) \ - ) * \ + BLIS_POOL_KC_S * \ BLIS_SIZEOF_S \ ) -#define BLIS_KN_BLOCK_SIZE_S ( ( BLIS_POOL_KC_S + \ - ( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \ - BLIS_SIZEOF_S \ - ) \ - ) * \ +#define BLIS_KN_BLOCK_SIZE_S ( BLIS_POOL_KC_S * \ BLIS_POOL_NC_S * \ BLIS_SIZEOF_S \ ) @@ -234,19 +227,12 @@ // // Compute memory pool block sizes for double real. // + #define BLIS_MK_BLOCK_SIZE_D ( BLIS_POOL_MC_D * \ - ( BLIS_POOL_KC_D + \ - ( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \ - BLIS_SIZEOF_D \ - ) \ - ) * \ + BLIS_POOL_KC_D * \ BLIS_SIZEOF_D \ ) -#define BLIS_KN_BLOCK_SIZE_D ( ( BLIS_POOL_KC_D + \ - ( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \ - BLIS_SIZEOF_D \ - ) \ - ) * \ +#define BLIS_KN_BLOCK_SIZE_D ( BLIS_POOL_KC_D * \ BLIS_POOL_NC_D * \ BLIS_SIZEOF_D \ ) @@ -258,19 +244,12 @@ // // Compute memory pool block sizes for single complex. // + #define BLIS_MK_BLOCK_SIZE_C ( BLIS_POOL_MC_C * \ - ( BLIS_POOL_KC_C + \ - ( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \ - BLIS_SIZEOF_C \ - ) \ - ) * \ + BLIS_POOL_KC_C * \ BLIS_SIZEOF_C \ ) -#define BLIS_KN_BLOCK_SIZE_C ( ( BLIS_POOL_KC_C + \ - ( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \ - BLIS_SIZEOF_C \ - ) \ - ) * \ +#define BLIS_KN_BLOCK_SIZE_C ( BLIS_POOL_KC_C * \ BLIS_POOL_NC_C * \ BLIS_SIZEOF_C \ ) @@ -282,19 +261,12 @@ // // Compute memory pool block sizes for double complex. // + #define BLIS_MK_BLOCK_SIZE_Z ( BLIS_POOL_MC_Z * \ - ( BLIS_POOL_KC_Z + \ - ( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \ - BLIS_SIZEOF_Z \ - ) \ - ) * \ + BLIS_POOL_KC_Z * \ BLIS_SIZEOF_Z \ ) -#define BLIS_KN_BLOCK_SIZE_Z ( ( BLIS_POOL_KC_Z + \ - ( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \ - BLIS_SIZEOF_Z \ - ) \ - ) * \ +#define BLIS_KN_BLOCK_SIZE_Z ( BLIS_POOL_KC_Z * \ BLIS_POOL_NC_Z * \ BLIS_SIZEOF_Z \ ) @@ -307,9 +279,6 @@ // Compute memory pool block sizes for single complex (4m). // -// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that -// of alignment is not supported by 4m. - #define BLIS_MK_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \ BLIS_POOL_4M_KC_C * \ BLIS_SIZEOF_C \ @@ -327,9 +296,6 @@ // Compute memory pool block sizes for double complex (4m). // -// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that -// of alignment is not supported by 4m. - #define BLIS_MK_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \ BLIS_POOL_4M_KC_Z * \ BLIS_SIZEOF_Z \ @@ -347,8 +313,7 @@ // Compute memory pool block sizes for single complex (3m). // -// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that -// of alignment is not supported by 3m. +// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m. #define BLIS_MK_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \ BLIS_POOL_3M_KC_C * \ @@ -373,8 +338,7 @@ // Compute memory pool block sizes for double complex (3m). // -// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that -// of alignment is not supported by 3m. +// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m. #define BLIS_MK_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \ BLIS_POOL_3M_KC_Z * \ diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index b383410b1..4d7575682 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -568,7 +568,6 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params ) libblis_test_fprintf_c( os, " # of kc x nc blocks %u\n", BLIS_NUM_KC_X_NC_BLOCKS ); libblis_test_fprintf_c( os, " # of mc x nc blocks %u\n", BLIS_NUM_MC_X_NC_BLOCKS ); libblis_test_fprintf_c( os, " block address alignment %u\n", BLIS_CONTIG_ADDR_ALIGN_SIZE ); - libblis_test_fprintf_c( os, " panel stride alignment %u\n", BLIS_CONTIG_STRIDE_ALIGN_SIZE ); libblis_test_fprintf_c( os, " max preload byte offset %u\n", BLIS_MAX_PRELOAD_BYTE_OFFSET ); libblis_test_fprintf_c( os, " actual pool sizes (bytes) \n" ); libblis_test_fprintf_c( os, " for mc x kc blocks of A %u\n", BLIS_MK_POOL_SIZE ); diff --git a/windows/build/bli_config.h b/windows/build/bli_config.h index 855e04865..4191767be 100644 --- a/windows/build/bli_config.h +++ b/windows/build/bli_config.h @@ -100,10 +100,6 @@ // from the contiguous memory allocator. #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// Alignment size used when sizing strides (eg: of packed micro-panels) -// within a block of contiguous memory. -#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16 - // -- MIXED DATATYPE SUPPORT ---------------------------------------------------