Deprecated panel stride alignment in bli_config.h.

Details:
- Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE from bli_config.h of all
  configurations. It was already going unused in packm_init() since the
  recent 4m/3m commit. This setting was rarely, if ever, useful, and its
  existence only posed a potential risk for 4m/3m-based implementations.
- Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE usage from mem_pool_macro_defs.h.
- Updated comments regarding CONTIG_STRIDE_ALIGN_SIZE in template
  micro-kernels.
This commit is contained in:
Field G. Van Zee
2014-02-26 12:46:45 -06:00
parent f18aee83a5
commit c2b2ab6270
19 changed files with 19 additions and 137 deletions

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -121,10 +121,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 32
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_CACHE_LINE_SIZE
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_CACHE_LINE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_CACHE_LINE_SIZE
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -116,10 +116,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -118,10 +118,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------

View File

@@ -162,8 +162,7 @@ void bli_dgemm_opt_mxn(
that exist (at the edges) is handled automatically within the
macro-kernel.
- Alignment of a1 and b1. The addresses a1 and b1 are aligned according
to the alignment value BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the
bli_config.h header file of the BLIS configuration.
to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively.
- Unrolling loops. As a general rule of thumb, the loop over k is
sometimes moderately unrolled; for example, in our experience, an
unrolling factor of u = 4 is fairly common. If unrolling is applied

View File

@@ -166,16 +166,8 @@ void bli_dgemmtrsm_l_opt_mxn(
- Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation
Notes for gemm.
- Edge cases in MR, NR dimensions. See Implementation Notes for gemm.
- Alignment of a1 and b1. Unlike with gemm, the addresses a10/a12 and a11
are not guaranteed to be aligned according to the alignment value
BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the bli_config.h header
file. This is because these micro-panels may vary in size due to the
triangular nature of matrix A. Instead, these addresses are aligned
to PACKMR x sizeof(type), where type is the datatype in question. To
support a somewhat obscure, higher-level optimization, we similarly
do not guarantee that b01/b21 and b11 are aligned to
BLIS_CONTIG_STRIDE_ALIGN_SIZE; instead, they are only aligned to
PACKNR x sizeof(type).
- Alignment of a1 and b1. The addresses a1 and b1 are aligned according
to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively.
- Unrolling loops. Most optimized implementations should unroll all
three loops within the trsm subproblem of gemmtrsm. See Implementation
Notes for gemm for remarks on unrolling the gemm subproblem.

View File

@@ -164,16 +164,8 @@ void bli_dgemmtrsm_u_opt_mxn(
- Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation
Notes for gemm.
- Edge cases in MR, NR dimensions. See Implementation Notes for gemm.
- Alignment of a1 and b1. Unlike with gemm, the addresses a10/a12 and a11
are not guaranteed to be aligned according to the alignment value
BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the bli_config.h header
file. This is because these micro-panels may vary in size due to the
triangular nature of matrix A. Instead, these addresses are aligned
to PACKMR x sizeof(type), where type is the datatype in question. To
support a somewhat obscure, higher-level optimization, we similarly
do not guarantee that b01/b21 and b11 are aligned to
BLIS_CONTIG_STRIDE_ALIGN_SIZE; instead, they are only aligned to
PACKNR x sizeof(type).
- Alignment of a1 and b1. The addresses a1 and b1 are aligned according
to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively.
- Unrolling loops. Most optimized implementations should unroll all
three loops within the trsm subproblem of gemmtrsm. See Implementation
Notes for gemm for remarks on unrolling the gemm subproblem.

View File

@@ -336,12 +336,6 @@ void bli_packm_init_pack( bool_t densify,
if ( pack_schema == BLIS_PACKED_ROW_PANELS_3M )
ps_p = ( ps_p * 3 ) / 2;
// Align the panel dimension according to the contiguous memory
// stride alignment size so that the second, third, etc panels begin
// at aligned addresses.
//ps_p = bli_align_dim_to_size( ps_p, elem_size_p,
// BLIS_CONTIG_STRIDE_ALIGN_SIZE );
// Store the strides and panel dimension in p.
bli_obj_set_incs( rs_p, cs_p, *p );
bli_obj_set_panel_dim( m_panel, *p );
@@ -383,12 +377,6 @@ void bli_packm_init_pack( bool_t densify,
if ( pack_schema == BLIS_PACKED_COL_PANELS_3M )
ps_p = ( ps_p * 3 ) / 2;
// Align the panel dimension according to the contiguous memory
// stride alignment size so that the second, third, etc panels begin
// at aligned addresses.
//ps_p = bli_align_dim_to_size( ps_p, elem_size_p,
// BLIS_CONTIG_STRIDE_ALIGN_SIZE );
// Store the strides and panel dimension in p.
bli_obj_set_incs( rs_p, cs_p, *p );
bli_obj_set_panel_dim( n_panel, *p );

View File

@@ -210,19 +210,12 @@
//
// Compute memory pool block sizes for single real.
//
#define BLIS_MK_BLOCK_SIZE_S ( BLIS_POOL_MC_S * \
( BLIS_POOL_KC_S + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_S \
) \
) * \
BLIS_POOL_KC_S * \
BLIS_SIZEOF_S \
)
#define BLIS_KN_BLOCK_SIZE_S ( ( BLIS_POOL_KC_S + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_S \
) \
) * \
#define BLIS_KN_BLOCK_SIZE_S ( BLIS_POOL_KC_S * \
BLIS_POOL_NC_S * \
BLIS_SIZEOF_S \
)
@@ -234,19 +227,12 @@
//
// Compute memory pool block sizes for double real.
//
#define BLIS_MK_BLOCK_SIZE_D ( BLIS_POOL_MC_D * \
( BLIS_POOL_KC_D + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_D \
) \
) * \
BLIS_POOL_KC_D * \
BLIS_SIZEOF_D \
)
#define BLIS_KN_BLOCK_SIZE_D ( ( BLIS_POOL_KC_D + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_D \
) \
) * \
#define BLIS_KN_BLOCK_SIZE_D ( BLIS_POOL_KC_D * \
BLIS_POOL_NC_D * \
BLIS_SIZEOF_D \
)
@@ -258,19 +244,12 @@
//
// Compute memory pool block sizes for single complex.
//
#define BLIS_MK_BLOCK_SIZE_C ( BLIS_POOL_MC_C * \
( BLIS_POOL_KC_C + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_C \
) \
) * \
BLIS_POOL_KC_C * \
BLIS_SIZEOF_C \
)
#define BLIS_KN_BLOCK_SIZE_C ( ( BLIS_POOL_KC_C + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_C \
) \
) * \
#define BLIS_KN_BLOCK_SIZE_C ( BLIS_POOL_KC_C * \
BLIS_POOL_NC_C * \
BLIS_SIZEOF_C \
)
@@ -282,19 +261,12 @@
//
// Compute memory pool block sizes for double complex.
//
#define BLIS_MK_BLOCK_SIZE_Z ( BLIS_POOL_MC_Z * \
( BLIS_POOL_KC_Z + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_Z \
) \
) * \
BLIS_POOL_KC_Z * \
BLIS_SIZEOF_Z \
)
#define BLIS_KN_BLOCK_SIZE_Z ( ( BLIS_POOL_KC_Z + \
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
BLIS_SIZEOF_Z \
) \
) * \
#define BLIS_KN_BLOCK_SIZE_Z ( BLIS_POOL_KC_Z * \
BLIS_POOL_NC_Z * \
BLIS_SIZEOF_Z \
)
@@ -307,9 +279,6 @@
// Compute memory pool block sizes for single complex (4m).
//
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
// of alignment is not supported by 4m.
#define BLIS_MK_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \
BLIS_POOL_4M_KC_C * \
BLIS_SIZEOF_C \
@@ -327,9 +296,6 @@
// Compute memory pool block sizes for double complex (4m).
//
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
// of alignment is not supported by 4m.
#define BLIS_MK_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \
BLIS_POOL_4M_KC_Z * \
BLIS_SIZEOF_Z \
@@ -347,8 +313,7 @@
// Compute memory pool block sizes for single complex (3m).
//
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
// of alignment is not supported by 3m.
// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
#define BLIS_MK_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \
BLIS_POOL_3M_KC_C * \
@@ -373,8 +338,7 @@
// Compute memory pool block sizes for double complex (3m).
//
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
// of alignment is not supported by 3m.
// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
#define BLIS_MK_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \
BLIS_POOL_3M_KC_Z * \

View File

@@ -568,7 +568,6 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params )
libblis_test_fprintf_c( os, " # of kc x nc blocks %u\n", BLIS_NUM_KC_X_NC_BLOCKS );
libblis_test_fprintf_c( os, " # of mc x nc blocks %u\n", BLIS_NUM_MC_X_NC_BLOCKS );
libblis_test_fprintf_c( os, " block address alignment %u\n", BLIS_CONTIG_ADDR_ALIGN_SIZE );
libblis_test_fprintf_c( os, " panel stride alignment %u\n", BLIS_CONTIG_STRIDE_ALIGN_SIZE );
libblis_test_fprintf_c( os, " max preload byte offset %u\n", BLIS_MAX_PRELOAD_BYTE_OFFSET );
libblis_test_fprintf_c( os, " actual pool sizes (bytes) \n" );
libblis_test_fprintf_c( os, " for mc x kc blocks of A %u\n", BLIS_MK_POOL_SIZE );

View File

@@ -100,10 +100,6 @@
// from the contiguous memory allocator.
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
// Alignment size used when sizing strides (eg: of packed micro-panels)
// within a block of contiguous memory.
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------