mirror of
https://github.com/amd/blis.git
synced 2026-03-24 03:07:22 +00:00
Deprecated panel stride alignment in bli_config.h.
Details: - Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE from bli_config.h of all configurations. It was already going unused in packm_init() since the recent 4m/3m commit. This setting was rarely, if ever, useful, and its existence only posed a potential risk for 4m/3m-based implementations. - Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE usage from mem_pool_macro_defs.h. - Updated comments regarding CONTIG_STRIDE_ALIGN_SIZE in template micro-kernels.
This commit is contained in:
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -121,10 +121,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 32
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_CACHE_LINE_SIZE
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_CACHE_LINE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_CACHE_LINE_SIZE
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -116,10 +116,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -118,10 +118,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
@@ -162,8 +162,7 @@ void bli_dgemm_opt_mxn(
|
||||
that exist (at the edges) is handled automatically within the
|
||||
macro-kernel.
|
||||
- Alignment of a1 and b1. The addresses a1 and b1 are aligned according
|
||||
to the alignment value BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the
|
||||
bli_config.h header file of the BLIS configuration.
|
||||
to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively.
|
||||
- Unrolling loops. As a general rule of thumb, the loop over k is
|
||||
sometimes moderately unrolled; for example, in our experience, an
|
||||
unrolling factor of u = 4 is fairly common. If unrolling is applied
|
||||
|
||||
@@ -166,16 +166,8 @@ void bli_dgemmtrsm_l_opt_mxn(
|
||||
- Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation
|
||||
Notes for gemm.
|
||||
- Edge cases in MR, NR dimensions. See Implementation Notes for gemm.
|
||||
- Alignment of a1 and b1. Unlike with gemm, the addresses a10/a12 and a11
|
||||
are not guaranteed to be aligned according to the alignment value
|
||||
BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the bli_config.h header
|
||||
file. This is because these micro-panels may vary in size due to the
|
||||
triangular nature of matrix A. Instead, these addresses are aligned
|
||||
to PACKMR x sizeof(type), where type is the datatype in question. To
|
||||
support a somewhat obscure, higher-level optimization, we similarly
|
||||
do not guarantee that b01/b21 and b11 are aligned to
|
||||
BLIS_CONTIG_STRIDE_ALIGN_SIZE; instead, they are only aligned to
|
||||
PACKNR x sizeof(type).
|
||||
- Alignment of a1 and b1. The addresses a1 and b1 are aligned according
|
||||
to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively.
|
||||
- Unrolling loops. Most optimized implementations should unroll all
|
||||
three loops within the trsm subproblem of gemmtrsm. See Implementation
|
||||
Notes for gemm for remarks on unrolling the gemm subproblem.
|
||||
|
||||
@@ -164,16 +164,8 @@ void bli_dgemmtrsm_u_opt_mxn(
|
||||
- Leading dimensions of a1 and b1: PACKMR and PACKNR. See Implementation
|
||||
Notes for gemm.
|
||||
- Edge cases in MR, NR dimensions. See Implementation Notes for gemm.
|
||||
- Alignment of a1 and b1. Unlike with gemm, the addresses a10/a12 and a11
|
||||
are not guaranteed to be aligned according to the alignment value
|
||||
BLIS_CONTIG_STRIDE_ALIGN_SIZE, as defined in the bli_config.h header
|
||||
file. This is because these micro-panels may vary in size due to the
|
||||
triangular nature of matrix A. Instead, these addresses are aligned
|
||||
to PACKMR x sizeof(type), where type is the datatype in question. To
|
||||
support a somewhat obscure, higher-level optimization, we similarly
|
||||
do not guarantee that b01/b21 and b11 are aligned to
|
||||
BLIS_CONTIG_STRIDE_ALIGN_SIZE; instead, they are only aligned to
|
||||
PACKNR x sizeof(type).
|
||||
- Alignment of a1 and b1. The addresses a1 and b1 are aligned according
|
||||
to PACKMR*sizeof(type) and PACKNR*sizeof(type), respectively.
|
||||
- Unrolling loops. Most optimized implementations should unroll all
|
||||
three loops within the trsm subproblem of gemmtrsm. See Implementation
|
||||
Notes for gemm for remarks on unrolling the gemm subproblem.
|
||||
|
||||
@@ -336,12 +336,6 @@ void bli_packm_init_pack( bool_t densify,
|
||||
if ( pack_schema == BLIS_PACKED_ROW_PANELS_3M )
|
||||
ps_p = ( ps_p * 3 ) / 2;
|
||||
|
||||
// Align the panel dimension according to the contiguous memory
|
||||
// stride alignment size so that the second, third, etc panels begin
|
||||
// at aligned addresses.
|
||||
//ps_p = bli_align_dim_to_size( ps_p, elem_size_p,
|
||||
// BLIS_CONTIG_STRIDE_ALIGN_SIZE );
|
||||
|
||||
// Store the strides and panel dimension in p.
|
||||
bli_obj_set_incs( rs_p, cs_p, *p );
|
||||
bli_obj_set_panel_dim( m_panel, *p );
|
||||
@@ -383,12 +377,6 @@ void bli_packm_init_pack( bool_t densify,
|
||||
if ( pack_schema == BLIS_PACKED_COL_PANELS_3M )
|
||||
ps_p = ( ps_p * 3 ) / 2;
|
||||
|
||||
// Align the panel dimension according to the contiguous memory
|
||||
// stride alignment size so that the second, third, etc panels begin
|
||||
// at aligned addresses.
|
||||
//ps_p = bli_align_dim_to_size( ps_p, elem_size_p,
|
||||
// BLIS_CONTIG_STRIDE_ALIGN_SIZE );
|
||||
|
||||
// Store the strides and panel dimension in p.
|
||||
bli_obj_set_incs( rs_p, cs_p, *p );
|
||||
bli_obj_set_panel_dim( n_panel, *p );
|
||||
|
||||
@@ -210,19 +210,12 @@
|
||||
//
|
||||
// Compute memory pool block sizes for single real.
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_S ( BLIS_POOL_MC_S * \
|
||||
( BLIS_POOL_KC_S + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_S \
|
||||
) \
|
||||
) * \
|
||||
BLIS_POOL_KC_S * \
|
||||
BLIS_SIZEOF_S \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_S ( ( BLIS_POOL_KC_S + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_S \
|
||||
) \
|
||||
) * \
|
||||
#define BLIS_KN_BLOCK_SIZE_S ( BLIS_POOL_KC_S * \
|
||||
BLIS_POOL_NC_S * \
|
||||
BLIS_SIZEOF_S \
|
||||
)
|
||||
@@ -234,19 +227,12 @@
|
||||
//
|
||||
// Compute memory pool block sizes for double real.
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_D ( BLIS_POOL_MC_D * \
|
||||
( BLIS_POOL_KC_D + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_D \
|
||||
) \
|
||||
) * \
|
||||
BLIS_POOL_KC_D * \
|
||||
BLIS_SIZEOF_D \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_D ( ( BLIS_POOL_KC_D + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_D \
|
||||
) \
|
||||
) * \
|
||||
#define BLIS_KN_BLOCK_SIZE_D ( BLIS_POOL_KC_D * \
|
||||
BLIS_POOL_NC_D * \
|
||||
BLIS_SIZEOF_D \
|
||||
)
|
||||
@@ -258,19 +244,12 @@
|
||||
//
|
||||
// Compute memory pool block sizes for single complex.
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_C ( BLIS_POOL_MC_C * \
|
||||
( BLIS_POOL_KC_C + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_C \
|
||||
) \
|
||||
) * \
|
||||
BLIS_POOL_KC_C * \
|
||||
BLIS_SIZEOF_C \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_C ( ( BLIS_POOL_KC_C + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_C \
|
||||
) \
|
||||
) * \
|
||||
#define BLIS_KN_BLOCK_SIZE_C ( BLIS_POOL_KC_C * \
|
||||
BLIS_POOL_NC_C * \
|
||||
BLIS_SIZEOF_C \
|
||||
)
|
||||
@@ -282,19 +261,12 @@
|
||||
//
|
||||
// Compute memory pool block sizes for double complex.
|
||||
//
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_Z ( BLIS_POOL_MC_Z * \
|
||||
( BLIS_POOL_KC_Z + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_Z \
|
||||
) \
|
||||
) * \
|
||||
BLIS_POOL_KC_Z * \
|
||||
BLIS_SIZEOF_Z \
|
||||
)
|
||||
#define BLIS_KN_BLOCK_SIZE_Z ( ( BLIS_POOL_KC_Z + \
|
||||
( BLIS_CONTIG_STRIDE_ALIGN_SIZE / \
|
||||
BLIS_SIZEOF_Z \
|
||||
) \
|
||||
) * \
|
||||
#define BLIS_KN_BLOCK_SIZE_Z ( BLIS_POOL_KC_Z * \
|
||||
BLIS_POOL_NC_Z * \
|
||||
BLIS_SIZEOF_Z \
|
||||
)
|
||||
@@ -307,9 +279,6 @@
|
||||
// Compute memory pool block sizes for single complex (4m).
|
||||
//
|
||||
|
||||
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
|
||||
// of alignment is not supported by 4m.
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \
|
||||
BLIS_POOL_4M_KC_C * \
|
||||
BLIS_SIZEOF_C \
|
||||
@@ -327,9 +296,6 @@
|
||||
// Compute memory pool block sizes for double complex (4m).
|
||||
//
|
||||
|
||||
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
|
||||
// of alignment is not supported by 4m.
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \
|
||||
BLIS_POOL_4M_KC_Z * \
|
||||
BLIS_SIZEOF_Z \
|
||||
@@ -347,8 +313,7 @@
|
||||
// Compute memory pool block sizes for single complex (3m).
|
||||
//
|
||||
|
||||
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
|
||||
// of alignment is not supported by 3m.
|
||||
// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \
|
||||
BLIS_POOL_3M_KC_C * \
|
||||
@@ -373,8 +338,7 @@
|
||||
// Compute memory pool block sizes for double complex (3m).
|
||||
//
|
||||
|
||||
// NOTE: We don't align by BLIS_CONTIG_STRIDE_ALIGN_SIZE here because that
|
||||
// of alignment is not supported by 3m.
|
||||
// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
|
||||
|
||||
#define BLIS_MK_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \
|
||||
BLIS_POOL_3M_KC_Z * \
|
||||
|
||||
@@ -568,7 +568,6 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params )
|
||||
libblis_test_fprintf_c( os, " # of kc x nc blocks %u\n", BLIS_NUM_KC_X_NC_BLOCKS );
|
||||
libblis_test_fprintf_c( os, " # of mc x nc blocks %u\n", BLIS_NUM_MC_X_NC_BLOCKS );
|
||||
libblis_test_fprintf_c( os, " block address alignment %u\n", BLIS_CONTIG_ADDR_ALIGN_SIZE );
|
||||
libblis_test_fprintf_c( os, " panel stride alignment %u\n", BLIS_CONTIG_STRIDE_ALIGN_SIZE );
|
||||
libblis_test_fprintf_c( os, " max preload byte offset %u\n", BLIS_MAX_PRELOAD_BYTE_OFFSET );
|
||||
libblis_test_fprintf_c( os, " actual pool sizes (bytes) \n" );
|
||||
libblis_test_fprintf_c( os, " for mc x kc blocks of A %u\n", BLIS_MK_POOL_SIZE );
|
||||
|
||||
@@ -100,10 +100,6 @@
|
||||
// from the contiguous memory allocator.
|
||||
#define BLIS_CONTIG_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE
|
||||
|
||||
// Alignment size used when sizing strides (eg: of packed micro-panels)
|
||||
// within a block of contiguous memory.
|
||||
#define BLIS_CONTIG_STRIDE_ALIGN_SIZE 16
|
||||
|
||||
|
||||
|
||||
// -- MIXED DATATYPE SUPPORT ---------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user