mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Combined 4m/3m bits into an expanded bitfield.
Details: - Combined the 4m/3m bits into an expanded bitfield, which will encode the packing "format" of the micro-panels. This will allow for more easily and compactly encoding additional formats. - Other minor comment/whitespace updates to bli_type_defs.h. - Updated bli_obj_macro_defs.h and bli_param_macro_defs.h to use the new format bitfield. - Comment update to bli_kernel_post_macro_defs.h. - Whitespace changes to bli_kernel_3m_macro_defs.h, _4m_macro_defs.h.
This commit is contained in:
@@ -49,7 +49,7 @@
|
||||
// -- Define default 3m-specific kernel names ----------------------------------
|
||||
|
||||
//
|
||||
// Level-3 3m
|
||||
// Level-3
|
||||
//
|
||||
|
||||
// gemm3m micro-kernels
|
||||
@@ -216,44 +216,44 @@
|
||||
|
||||
// 3m cache blocksizes
|
||||
#ifndef BLIS_DEFAULT_3M_MC_C
|
||||
#define BLIS_DEFAULT_3M_MC_C ((BLIS_DEFAULT_MC_S)/1)
|
||||
#define BLIS_DEFAULT_3M_MC_C BLIS_DEFAULT_MC_S
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_3M_KC_C
|
||||
#define BLIS_DEFAULT_3M_KC_C ((BLIS_DEFAULT_KC_S)/2)
|
||||
#define BLIS_DEFAULT_3M_KC_C ((BLIS_DEFAULT_KC_S)/2)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_3M_NC_C
|
||||
#define BLIS_DEFAULT_3M_NC_C ((BLIS_DEFAULT_NC_S)/1)
|
||||
#define BLIS_DEFAULT_3M_NC_C BLIS_DEFAULT_NC_S
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DEFAULT_3M_MC_Z
|
||||
#define BLIS_DEFAULT_3M_MC_Z ((BLIS_DEFAULT_MC_D)/1)
|
||||
#define BLIS_DEFAULT_3M_MC_Z BLIS_DEFAULT_MC_D
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_3M_KC_Z
|
||||
#define BLIS_DEFAULT_3M_KC_Z ((BLIS_DEFAULT_KC_D)/2)
|
||||
#define BLIS_DEFAULT_3M_KC_Z ((BLIS_DEFAULT_KC_D)/2)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_3M_NC_Z
|
||||
#define BLIS_DEFAULT_3M_NC_Z ((BLIS_DEFAULT_NC_D)/1)
|
||||
#define BLIS_DEFAULT_3M_NC_Z BLIS_DEFAULT_NC_D
|
||||
#endif
|
||||
|
||||
// 3m cache blocksize extensions
|
||||
#ifndef BLIS_EXTEND_3M_MC_C
|
||||
#define BLIS_EXTEND_3M_MC_C 0
|
||||
#define BLIS_EXTEND_3M_MC_C BLIS_EXTEND_MC_S
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_3M_KC_C
|
||||
#define BLIS_EXTEND_3M_KC_C 0
|
||||
#define BLIS_EXTEND_3M_KC_C ((BLIS_EXTEND_KC_S)/2)
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_3M_NC_C
|
||||
#define BLIS_EXTEND_3M_NC_C 0
|
||||
#define BLIS_EXTEND_3M_NC_C BLIS_EXTEND_NC_S
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_EXTEND_3M_MC_Z
|
||||
#define BLIS_EXTEND_3M_MC_Z 0
|
||||
#define BLIS_EXTEND_3M_MC_Z BLIS_EXTEND_MC_D
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_3M_KC_Z
|
||||
#define BLIS_EXTEND_3M_KC_Z 0
|
||||
#define BLIS_EXTEND_3M_KC_Z ((BLIS_EXTEND_KC_D)/2)
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_3M_NC_Z
|
||||
#define BLIS_EXTEND_3M_NC_Z 0
|
||||
#define BLIS_EXTEND_3M_NC_Z BLIS_EXTEND_NC_D
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
// -- Define default 4m-specific kernel names ----------------------------------
|
||||
|
||||
//
|
||||
// Level-3 4m
|
||||
// Level-3
|
||||
//
|
||||
|
||||
// gemm4m micro-kernels
|
||||
@@ -216,44 +216,44 @@
|
||||
|
||||
// 4m cache blocksizes
|
||||
#ifndef BLIS_DEFAULT_4M_MC_C
|
||||
#define BLIS_DEFAULT_4M_MC_C ((BLIS_DEFAULT_MC_S)/1)
|
||||
#define BLIS_DEFAULT_4M_MC_C BLIS_DEFAULT_MC_S
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_4M_KC_C
|
||||
#define BLIS_DEFAULT_4M_KC_C ((BLIS_DEFAULT_KC_S)/2)
|
||||
#define BLIS_DEFAULT_4M_KC_C ((BLIS_DEFAULT_KC_S)/2)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_4M_NC_C
|
||||
#define BLIS_DEFAULT_4M_NC_C ((BLIS_DEFAULT_NC_S)/1)
|
||||
#define BLIS_DEFAULT_4M_NC_C BLIS_DEFAULT_NC_S
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_DEFAULT_4M_MC_Z
|
||||
#define BLIS_DEFAULT_4M_MC_Z ((BLIS_DEFAULT_MC_D)/1)
|
||||
#define BLIS_DEFAULT_4M_MC_Z BLIS_DEFAULT_MC_D
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_4M_KC_Z
|
||||
#define BLIS_DEFAULT_4M_KC_Z ((BLIS_DEFAULT_KC_D)/2)
|
||||
#define BLIS_DEFAULT_4M_KC_Z ((BLIS_DEFAULT_KC_D)/2)
|
||||
#endif
|
||||
#ifndef BLIS_DEFAULT_4M_NC_Z
|
||||
#define BLIS_DEFAULT_4M_NC_Z ((BLIS_DEFAULT_NC_D)/1)
|
||||
#define BLIS_DEFAULT_4M_NC_Z BLIS_DEFAULT_NC_D
|
||||
#endif
|
||||
|
||||
// 4m cache blocksize extensions
|
||||
#ifndef BLIS_EXTEND_4M_MC_C
|
||||
#define BLIS_EXTEND_4M_MC_C 0
|
||||
#define BLIS_EXTEND_4M_MC_C BLIS_EXTEND_MC_S
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_4M_KC_C
|
||||
#define BLIS_EXTEND_4M_KC_C 0
|
||||
#define BLIS_EXTEND_4M_KC_C ((BLIS_EXTEND_KC_S)/2)
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_4M_NC_C
|
||||
#define BLIS_EXTEND_4M_NC_C 0
|
||||
#define BLIS_EXTEND_4M_NC_C BLIS_EXTEND_NC_S
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_EXTEND_4M_MC_Z
|
||||
#define BLIS_EXTEND_4M_MC_Z 0
|
||||
#define BLIS_EXTEND_4M_MC_Z BLIS_EXTEND_MC_D
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_4M_KC_Z
|
||||
#define BLIS_EXTEND_4M_KC_Z 0
|
||||
#define BLIS_EXTEND_4M_KC_Z ((BLIS_EXTEND_KC_D)/2)
|
||||
#endif
|
||||
#ifndef BLIS_EXTEND_4M_NC_Z
|
||||
#define BLIS_EXTEND_4M_NC_Z 0
|
||||
#define BLIS_EXTEND_4M_NC_Z BLIS_EXTEND_NC_D
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -269,6 +269,21 @@
|
||||
|
||||
// -- Maximum register blocksize search ----------------------------------------
|
||||
|
||||
// The macro-kernels oftentimes need to statically allocate a temporary
|
||||
// MR x NR micro-tile of C. This micro-tile must be sized such that it will
|
||||
// work for both native and 4m/3m implementations, since the user can switch
|
||||
// between them at runtime. In order to facilitate the sizing of those
|
||||
// micro-tiles, we must determine the largest the register blocksizes would
|
||||
// need to be to accommodate both native and 4m/3m-based complex
|
||||
// micro-kernels. For real datatypes, the maximum is never larger than the
|
||||
// actual s and d register blocksizes. However, for complex datatypes, the
|
||||
// "native" register blocksizes may differ from the "virtual" register
|
||||
// blocksizes used by the 4m/3m implementations. Usually, it is the register
|
||||
// blocksizes used for 4m/3m-based complex micro-kernels that would be
|
||||
// larger, and thus determine the maximum for c and z datatypes. But, we
|
||||
// prefer not to assume this, therefore, we always take the larger of the
|
||||
// two values.
|
||||
|
||||
//
|
||||
// Find the largest register blocksize MR.
|
||||
//
|
||||
|
||||
@@ -215,11 +215,11 @@
|
||||
|
||||
#define bli_obj_is_4m_packed( obj ) \
|
||||
\
|
||||
( ( (obj).info & BLIS_PACK_4M_BIT ) )
|
||||
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4M )
|
||||
|
||||
#define bli_obj_is_3m_packed( obj ) \
|
||||
\
|
||||
( ( (obj).info & BLIS_PACK_3M_BIT ) )
|
||||
( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3M )
|
||||
|
||||
#define bli_obj_pack_buffer_type( obj ) \
|
||||
\
|
||||
|
||||
@@ -524,11 +524,11 @@
|
||||
|
||||
#define bli_is_4m_packed( schema ) \
|
||||
\
|
||||
( ( schema & BLIS_PACK_4M_BIT ) )
|
||||
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4M )
|
||||
|
||||
#define bli_is_3m_packed( schema ) \
|
||||
\
|
||||
( ( schema & BLIS_PACK_3M_BIT ) )
|
||||
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3M )
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -214,32 +214,32 @@ typedef struct
|
||||
- 11: precision (0 == single, 1 == double)
|
||||
- 12: unused
|
||||
15 ~ 13 Execution numerical datatype
|
||||
- 13 domain (0 == real, 1 == complex)
|
||||
- 13: domain (0 == real, 1 == complex)
|
||||
- 14: precision (0 == single, 1 == double)
|
||||
- 15: unused
|
||||
20 ~ 16 Packed type/status
|
||||
- 00000 0 == not packed
|
||||
- 10000 1 == packed (unspecified; row, column, or vector)
|
||||
- 10000 2 == packed by rows
|
||||
- 10001 3 == packed by columns
|
||||
- 10010 4 == packed by row panels
|
||||
- 10011 5 == packed by column panels
|
||||
- 10100 6 == packed by row panels (4m)
|
||||
- 10101 7 == packed by column panels (4m)
|
||||
- 11000 8 == packed by row panels (3m)
|
||||
- 11001 9 == packed by column panels (3m)
|
||||
21 Packed panel order if upper-stored
|
||||
21 ~ 16 Packed type/status
|
||||
- 000000: not packed
|
||||
- 100000: packed (unspecified; by rows, columns, or vector)
|
||||
- 100000: packed by rows
|
||||
- 100001: packed by columns
|
||||
- 100010: packed by row panels
|
||||
- 100011: packed by column panels
|
||||
- 100110: packed by 4m row panels
|
||||
- 100111: packed by 4m column panels
|
||||
- 101010: packed by 3m row panels
|
||||
- 101011: packed by 3m column panels
|
||||
22 Packed panel order if upper-stored
|
||||
- 0 == forward order if upper
|
||||
- 1 == reverse order if upper
|
||||
22 Packed panel order if lower-stored
|
||||
23 Packed panel order if lower-stored
|
||||
- 0 == forward order if lower
|
||||
- 1 == reverse order if lower
|
||||
24 ~ 23 Packed buffer type
|
||||
25 ~ 24 Packed buffer type
|
||||
- 0 == block of A
|
||||
- 1 == panel of B
|
||||
- 2 == panel of C
|
||||
- 3 == general use
|
||||
26 ~ 25 Structure type
|
||||
27 ~ 26 Structure type
|
||||
- 0 == general
|
||||
- 1 == Hermitian
|
||||
- 2 == symmetric
|
||||
@@ -263,13 +263,12 @@ typedef struct
|
||||
#define BLIS_PACK_SCHEMA_SHIFT 16
|
||||
#define BLIS_PACK_RC_SHIFT 16
|
||||
#define BLIS_PACK_PANEL_SHIFT 17
|
||||
#define BLIS_PACK_4M_SHIFT 18
|
||||
#define BLIS_PACK_3M_SHIFT 19
|
||||
#define BLIS_PACK_SHIFT 20
|
||||
#define BLIS_PACK_REV_IF_UPPER_SHIFT 21
|
||||
#define BLIS_PACK_REV_IF_LOWER_SHIFT 22
|
||||
#define BLIS_PACK_BUFFER_SHIFT 23
|
||||
#define BLIS_STRUC_SHIFT 25
|
||||
#define BLIS_PACK_FORMAT_SHIFT 18
|
||||
#define BLIS_PACK_SHIFT 21
|
||||
#define BLIS_PACK_REV_IF_UPPER_SHIFT 22
|
||||
#define BLIS_PACK_REV_IF_LOWER_SHIFT 23
|
||||
#define BLIS_PACK_BUFFER_SHIFT 24
|
||||
#define BLIS_STRUC_SHIFT 26
|
||||
|
||||
//
|
||||
// -- BLIS info bit field masks ------------------------------------------------
|
||||
@@ -292,8 +291,7 @@ typedef struct
|
||||
#define BLIS_PACK_SCHEMA_BITS ( 0x1F << BLIS_PACK_SCHEMA_SHIFT )
|
||||
#define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT )
|
||||
#define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT )
|
||||
#define BLIS_PACK_4M_BIT ( 0x1 << BLIS_PACK_4M_SHIFT )
|
||||
#define BLIS_PACK_3M_BIT ( 0x1 << BLIS_PACK_3M_SHIFT )
|
||||
#define BLIS_PACK_FORMAT_BITS ( 0x7 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_PACK_BIT ( 0x1 << BLIS_PACK_SHIFT )
|
||||
#define BLIS_PACK_REV_IF_UPPER_BIT ( 0x1 << BLIS_PACK_REV_IF_UPPER_SHIFT )
|
||||
#define BLIS_PACK_REV_IF_LOWER_BIT ( 0x1 << BLIS_PACK_REV_IF_LOWER_SHIFT )
|
||||
@@ -328,15 +326,17 @@ typedef struct
|
||||
#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT
|
||||
#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT
|
||||
#define BLIS_BITVAL_NOT_PACKED 0x0
|
||||
#define BLIS_BITVAL_4M ( 0x1 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_3M ( 0x2 << BLIS_PACK_FORMAT_SHIFT )
|
||||
#define BLIS_BITVAL_PACKED_UNSPEC BLIS_PACK_BIT
|
||||
#define BLIS_BITVAL_PACKED_ROWS BLIS_PACK_BIT
|
||||
#define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_4M ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_4M_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_4M ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_4M_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_3M ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_3M_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_3M ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_3M_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_4M ( BLIS_PACK_BIT | BLIS_BITVAL_4M | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_4M ( BLIS_PACK_BIT | BLIS_BITVAL_4M | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACKED_ROW_PANELS_3M ( BLIS_PACK_BIT | BLIS_BITVAL_3M | BLIS_PACK_PANEL_BIT )
|
||||
#define BLIS_BITVAL_PACKED_COL_PANELS_3M ( BLIS_PACK_BIT | BLIS_BITVAL_3M | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0
|
||||
#define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT
|
||||
#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0
|
||||
|
||||
Reference in New Issue
Block a user