Deprecated panel stride alignment in bli_config.h.

Details:
- Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE from bli_config.h of all
  configurations. It was already going unused in packm_init() since the
  recent 4m/3m commit. This setting was rarely, if ever, useful, and its
  existence only posed a potential risk for 4m/3m-based implementations.
- Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE usage from mem_pool_macro_defs.h.
- Updated comments regarding CONTIG_STRIDE_ALIGN_SIZE in template
  micro-kernels.
This commit is contained in:
Field G. Van Zee
2014-02-26 12:46:45 -06:00
parent f18aee83a5
commit c2b2ab6270
19 changed files with 19 additions and 137 deletions

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -121,10 +121,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 32
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_CACHE_LINE_SIZE
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_CACHE_LINE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_CACHE_LINE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_CACHE_LINE_SIZE
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -116,10 +116,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -162,8 +162,7 @@ void bli_dgemm_opt_mxn(
that exist (at the edges) is handled automatically within the that exist (at the edges) is handled automatically within the
macro-kernel. macro-kernel.
- Alignment of a1 and b1. The addresses a1 and b1 are aligned according - Alignment of a1 and b1. The addresses a1 and b1 are aligned according
to the alignment value BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively.
bli_config.h header file of the BLIS configuration.
- Unrolling loops. As a general rule of thumb, the loop over k is - Unrolling loops. As a general rule of thumb, the loop over k is
sometimes moderately unrolled; for example, in our experience, an sometimes moderately unrolled; for example, in our experience, an
unrolling factor of u = 4 is fairly common. If unrolling is applied unrolling factor of u = 4 is fairly common. If unrolling is applied

View File

@@ -166,16 +166,8 @@ void bli_dgemmtrsm_l_opt_mxn(
- Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation - Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation
Notes for gemm. Notes for gemm.
- Edge cases in MR, NR dimensions. See Implementation Notes for gemm. - Edge cases in MR, NR dimensions. See Implementation Notes for gemm.
- Alignment of a1 and b1. Unlike with gemm, the addresses a10/a12 and a11 - Alignment of a1 and b1. The addresses a1 and b1 are aligned according
are not guaranteed to be aligned according to the alignment value to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively.
BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the bli_config.h header
file. This is because these micro-panels may vary in size due to the
triangular nature of matrix A. Instead, these addresses are aligned
to PACKMR x sizeof(type), where type is the datatype in question. To
support a somewhat obscure, higher-level optimization, we similarly
do not guarantee that b01/b21 and b11 are aligned to
BLIS_CONTIG_STRIDE_ALIGN_SIZE; instead, they are only aligned to
PACKNR x sizeof(type).
- Unrolling loops. Most optimized implementations should unroll all - Unrolling loops. Most optimized implementations should unroll all
three loops within the trsm subproblem of gemmtrsm. See Implementation three loops within the trsm subproblem of gemmtrsm. See Implementation
Notes for gemm for remarks on unrolling the gemm subproblem. Notes for gemm for remarks on unrolling the gemm subproblem.

View File

@@ -164,16 +164,8 @@ void bli_dgemmtrsm_u_opt_mxn(
- Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation - Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation
Notes for gemm. Notes for gemm.
- Edge cases in MR, NR dimensions. See Implementation Notes for gemm. - Edge cases in MR, NR dimensions. See Implementation Notes for gemm.
- Alignment of a1 and b1. Unlike with gemm, the addresses a10/a12 and a11 - Alignment of a1 and b1. The addresses a1 and b1 are aligned according
are not guaranteed to be aligned according to the alignment value to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively.
BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the bli_config.h header
file. This is because these micro-panels may vary in size due to the
triangular nature of matrix A. Instead, these addresses are aligned
to PACKMR x sizeof(type), where type is the datatype in question. To
support a somewhat obscure, higher-level optimization, we similarly
do not guarantee that b01/b21 and b11 are aligned to
BLIS_CONTIG_STRIDE_ALIGN_SIZE; instead, they are only aligned to
PACKNR x sizeof(type).
- Unrolling loops. Most optimized implementations should unroll all - Unrolling loops. Most optimized implementations should unroll all
three loops within the trsm subproblem of gemmtrsm. See Implementation three loops within the trsm subproblem of gemmtrsm. See Implementation
Notes for gemm for remarks on unrolling the gemm subproblem. Notes for gemm for remarks on unrolling the gemm subproblem.

View File

@@ -336,12 +336,6 @@ void bli_packm_init_pack( bool_t densify,
if ( pack_schema == BLIS_PACKED_ROW_PANELS_3M ) if ( pack_schema == BLIS_PACKED_ROW_PANELS_3M )
ps_p = ( ps_p * 3 ) / 2; ps_p = ( ps_p * 3 ) / 2;
// Align the panel dimension according to the contiguous memory
// stride alignment size so that the second, third, etc panels begin
// at aligned addresses.
//ps_p = bli_align_dim_to_size( ps_p, elem_size_p,
// BLIS_CONTIG_STRIDE_ALIGN_SIZE );
// Store the strides and panel dimension in p. // Store the strides and panel dimension in p.
bli_obj_set_incs( rs_p, cs_p, *p ); bli_obj_set_incs( rs_p, cs_p, *p );
bli_obj_set_panel_dim( m_panel, *p ); bli_obj_set_panel_dim( m_panel, *p );
@@ -383,12 +377,6 @@ void bli_packm_init_pack( bool_t densify,
if ( pack_schema == BLIS_PACKED_COL_PANELS_3M ) if ( pack_schema == BLIS_PACKED_COL_PANELS_3M )
ps_p = ( ps_p * 3 ) / 2; ps_p = ( ps_p * 3 ) / 2;
// Align the panel dimension according to the contiguous memory
// stride alignment size so that the second, third, etc panels begin
// at aligned addresses.
//ps_p = bli_align_dim_to_size( ps_p, elem_size_p,
// BLIS_CONTIG_STRIDE_ALIGN_SIZE );
// Store the strides and panel dimension in p. // Store the strides and panel dimension in p.
bli_obj_set_incs( rs_p, cs_p, *p ); bli_obj_set_incs( rs_p, cs_p, *p );
bli_obj_set_panel_dim( n_panel, *p ); bli_obj_set_panel_dim( n_panel, *p );

View File

@@ -210,19 +210,12 @@
// //
// Compute memory pool block sizes for single real. // Compute memory pool block sizes for single real.
// //
#define BLIS_MK_BLOCK_SIZE_S ( BLIS_POOL_MC_S * \ #define BLIS_MK_BLOCK_SIZE_S ( BLIS_POOL_MC_S * \
( BLIS_POOL_KC_S + \ BLIS_POOL_KC_S * \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_S \
) \
) * \
BLIS_SIZEOF_S \ BLIS_SIZEOF_S \
) )
#define BLIS_KN_BLOCK_SIZE_S ( ( BLIS_POOL_KC_S + \ #define BLIS_KN_BLOCK_SIZE_S ( BLIS_POOL_KC_S * \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_S \
) \
) * \
BLIS_POOL_NC_S * \ BLIS_POOL_NC_S * \
BLIS_SIZEOF_S \ BLIS_SIZEOF_S \
) )
@@ -234,19 +227,12 @@
// //
// Compute memory pool block sizes for double real. // Compute memory pool block sizes for double real.
// //
#define BLIS_MK_BLOCK_SIZE_D ( BLIS_POOL_MC_D * \ #define BLIS_MK_BLOCK_SIZE_D ( BLIS_POOL_MC_D * \
( BLIS_POOL_KC_D + \ BLIS_POOL_KC_D * \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_D \
) \
) * \
BLIS_SIZEOF_D \ BLIS_SIZEOF_D \
) )
#define BLIS_KN_BLOCK_SIZE_D ( ( BLIS_POOL_KC_D + \ #define BLIS_KN_BLOCK_SIZE_D ( BLIS_POOL_KC_D * \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_D \
) \
) * \
BLIS_POOL_NC_D * \ BLIS_POOL_NC_D * \
BLIS_SIZEOF_D \ BLIS_SIZEOF_D \
) )
@@ -258,19 +244,12 @@
// //
// Compute memory pool block sizes for single complex. // Compute memory pool block sizes for single complex.
// //
#define BLIS_MK_BLOCK_SIZE_C ( BLIS_POOL_MC_C * \ #define BLIS_MK_BLOCK_SIZE_C ( BLIS_POOL_MC_C * \
( BLIS_POOL_KC_C + \ BLIS_POOL_KC_C * \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_C \
) \
) * \
BLIS_SIZEOF_C \ BLIS_SIZEOF_C \
) )
#define BLIS_KN_BLOCK_SIZE_C ( ( BLIS_POOL_KC_C + \ #define BLIS_KN_BLOCK_SIZE_C ( BLIS_POOL_KC_C * \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_C \
) \
) * \
BLIS_POOL_NC_C * \ BLIS_POOL_NC_C * \
BLIS_SIZEOF_C \ BLIS_SIZEOF_C \
) )
@@ -282,19 +261,12 @@
// //
// Compute memory pool block sizes for double complex. // Compute memory pool block sizes for double complex.
// //
#define BLIS_MK_BLOCK_SIZE_Z ( BLIS_POOL_MC_Z * \ #define BLIS_MK_BLOCK_SIZE_Z ( BLIS_POOL_MC_Z * \
( BLIS_POOL_KC_Z + \ BLIS_POOL_KC_Z * \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_Z \
) \
) * \
BLIS_SIZEOF_Z \ BLIS_SIZEOF_Z \
) )
#define BLIS_KN_BLOCK_SIZE_Z ( ( BLIS_POOL_KC_Z + \ #define BLIS_KN_BLOCK_SIZE_Z ( BLIS_POOL_KC_Z * \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_Z \
) \
) * \
BLIS_POOL_NC_Z * \ BLIS_POOL_NC_Z * \
BLIS_SIZEOF_Z \ BLIS_SIZEOF_Z \
) )
@@ -307,9 +279,6 @@
// Compute memory pool block sizes for single complex (4m). // Compute memory pool block sizes for single complex (4m).
// //
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
// of alignment is not supported by 4m.
#define BLIS_MK_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \ #define BLIS_MK_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \
BLIS_POOL_4M_KC_C * \ BLIS_POOL_4M_KC_C * \
BLIS_SIZEOF_C \ BLIS_SIZEOF_C \
@@ -327,9 +296,6 @@
// Compute memory pool block sizes for double complex (4m). // Compute memory pool block sizes for double complex (4m).
// //
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
// of alignment is not supported by 4m.
#define BLIS_MK_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \ #define BLIS_MK_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \
BLIS_POOL_4M_KC_Z * \ BLIS_POOL_4M_KC_Z * \
BLIS_SIZEOF_Z \ BLIS_SIZEOF_Z \
@@ -347,8 +313,7 @@
// Compute memory pool block sizes for single complex (3m). // Compute memory pool block sizes for single complex (3m).
// //
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that // NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
// of alignment is not supported by 3m.
#define BLIS_MK_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \ #define BLIS_MK_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \
BLIS_POOL_3M_KC_C * \ BLIS_POOL_3M_KC_C * \
@@ -373,8 +338,7 @@
// Compute memory pool block sizes for double complex (3m). // Compute memory pool block sizes for double complex (3m).
// //
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that // NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
// of alignment is not supported by 3m.
#define BLIS_MK_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \ #define BLIS_MK_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \
BLIS_POOL_3M_KC_Z * \ BLIS_POOL_3M_KC_Z * \

View File

@@ -568,7 +568,6 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params )
libblis_test_fprintf_c( os, " # of kc x nc blocks %u\n", BLIS_NUM_KC_X_NC_BLOCKS ); libblis_test_fprintf_c( os, " # of kc x nc blocks %u\n", BLIS_NUM_KC_X_NC_BLOCKS );
libblis_test_fprintf_c( os, " # of mc x nc blocks %u\n", BLIS_NUM_MC_X_NC_BLOCKS ); libblis_test_fprintf_c( os, " # of mc x nc blocks %u\n", BLIS_NUM_MC_X_NC_BLOCKS );
libblis_test_fprintf_c( os, " block address alignment %u\n", BLIS_CONTIG_ADDR_ALIGN_SIZE ); libblis_test_fprintf_c( os, " block address alignment %u\n", BLIS_CONTIG_ADDR_ALIGN_SIZE );
libblis_test_fprintf_c( os, " panel stride alignment %u\n", BLIS_CONTIG_STRIDE_ALIGN_SIZE );
libblis_test_fprintf_c( os, " max preload byte offset %u\n", BLIS_MAX_PRELOAD_BYTE_OFFSET ); libblis_test_fprintf_c( os, " max preload byte offset %u\n", BLIS_MAX_PRELOAD_BYTE_OFFSET );
libblis_test_fprintf_c( os, " actual pool sizes (bytes) \n" ); libblis_test_fprintf_c( os, " actual pool sizes (bytes) \n" );
libblis_test_fprintf_c( os, " for mc x kc blocks of A %u\n", BLIS_MK_POOL_SIZE ); libblis_test_fprintf_c( os, " for mc x kc blocks of A %u\n", BLIS_MK_POOL_SIZE );

View File

@@ -100,10 +100,6 @@
// from the contiguous memory allocator. // from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE #define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
// -- MIXED DATATYPE SUPPORT --------------------------------------------------- // -- MIXED DATATYPE SUPPORT ---------------------------------------------------