Merge commit 'cfa3db3f' into amd-main

* commit 'cfa3db3f':
  Fixed bug in mixed-dt gemm introduced in e9da642.
  Removed support for 3m, 4m induced methods.
  Updated do_sde.sh to get SDE from GitHub.
  Disable SDE testing of old AMD microarchitectures.
  Fixed substitution bug in configure.
  Allow use of 1m with mixing of row/col-pref ukrs.

AMD-Internal: [CPUPL-2698]
Change-Id: I961f0066243cf26aeb2e174e388b470133cc4a5f
This commit is contained in:
Edward Smyth
2024-07-08 05:55:22 -04:00
180 changed files with 2311 additions and 17801 deletions

View File

@@ -69,7 +69,6 @@ void PASTEMAC2(cntx_init_,archname,BLIS_REF_SUFFIX) \
void PASTEMAC2(cntx_init_,archname,BLIS_IND_SUFFIX) \
( \
ind_t method, \
num_t dt, \
cntx_t* cntx \
);

View File

@@ -1000,50 +1000,6 @@ BLIS_INLINE bool bli_is_panel_packed( pack_t schema )
( schema & BLIS_PACK_PANEL_BIT );
}
BLIS_INLINE bool bli_is_4mi_packed( pack_t schema )
{
return ( bool )
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4MI );
}
BLIS_INLINE bool bli_is_3mi_packed( pack_t schema )
{
return ( bool )
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MI );
}
BLIS_INLINE bool bli_is_3ms_packed( pack_t schema )
{
return ( bool )
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MS );
}
BLIS_INLINE bool bli_is_ro_packed( pack_t schema )
{
return ( bool )
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_RO );
}
BLIS_INLINE bool bli_is_io_packed( pack_t schema )
{
return ( bool )
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_IO );
}
BLIS_INLINE bool bli_is_rpi_packed( pack_t schema )
{
return ( bool )
( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_RPI );
}
BLIS_INLINE bool bli_is_rih_packed( pack_t schema )
{
return ( bool )
( bli_is_ro_packed( schema ) ||
bli_is_io_packed( schema ) ||
bli_is_rpi_packed( schema ) );
}
BLIS_INLINE bool bli_is_1r_packed( pack_t schema )
{
return ( bool )
@@ -1082,20 +1038,6 @@ BLIS_INLINE guint_t bli_pack_schema_index( pack_t schema )
}
// pointer-related
// Increment a pointer by an integer fraction:
// p0 + (num/dem)
// where p0 is a pointer to a datatype of size sizeof_p0.
BLIS_INLINE void_fp bli_ptr_inc_by_frac( void_fp p0, siz_t sizeof_p0, dim_t num, dim_t den )
{
return ( void_fp )
( ( char* )p0 + ( ( num * ( dim_t )sizeof_p0 ) / den ) );
}
// Set dimensions, increments, effective uplo/diagoff, etc for ONE matrix
// argument.

View File

@@ -206,37 +206,6 @@
#include "bli_set0bbs_mxn.h"
// -- 3m-specific scalar macros --
#include "bli_copyri3s.h"
#include "bli_copyjri3s.h"
#include "bli_scal2ri3s.h"
#include "bli_scal2jri3s.h"
#include "bli_scal2ri3s_mxn.h"
// -- 4mh/3mh-specific scalar macros --
// ro
#include "bli_scal2ros.h"
#include "bli_scal2jros.h"
// io
#include "bli_scal2ios.h"
#include "bli_scal2jios.h"
// rpi
#include "bli_scal2rpis.h"
#include "bli_scal2jrpis.h"
#include "bli_scal2rihs_mxn.h"
#include "bli_scal2rihs_mxn_diag.h"
#include "bli_scal2rihs_mxn_uplo.h"
#include "bli_setrihs_mxn_diag.h"
// -- 1m-specific scalar macros --
// 1e

View File

@@ -258,24 +258,10 @@ typedef void (*free_ft) ( void* p );
- 1 0000 01: packed by columns
- 1 0000 10: packed by row panels
- 1 0000 11: packed by column panels
- 1 0001 10: packed by 4m interleaved row panels
- 1 0001 11: packed by 4m interleaved column panels
- 1 0010 10: packed by 3m interleaved row panels
- 1 0010 11: packed by 3m interleaved column panels
- 1 0011 10: packed by 4m separated row panels (not used)
- 1 0011 11: packed by 4m separated column panels (not used)
- 1 0100 10: packed by 3m separated row panels
- 1 0100 11: packed by 3m separated column panels
- 1 0101 10: packed real-only row panels
- 1 0101 11: packed real-only column panels
- 1 0110 10: packed imag-only row panels
- 1 0110 11: packed imag-only column panels
- 1 0111 10: packed real+imag row panels
- 1 0111 11: packed real+imag column panels
- 1 1000 10: packed by 1m expanded row panels
- 1 1000 11: packed by 1m expanded column panels
- 1 1001 10: packed by 1m reordered row panels
- 1 1001 11: packed by 1m reordered column panels
- 1 0001 10: packed by 1m expanded row panels
- 1 0001 11: packed by 1m expanded column panels
- 1 0010 10: packed by 1m reordered row panels
- 1 0010 11: packed by 1m reordered column panels
23 Packed panel order if upper-stored
- 0 == forward order if upper
- 1 == reverse order if upper
@@ -413,34 +399,13 @@ typedef void (*free_ft) ( void* p );
#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT
#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT
#define BLIS_BITVAL_NOT_PACKED 0x0
#define BLIS_BITVAL_4MI ( 0x1 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_3MI ( 0x2 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_4MS ( 0x3 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_3MS ( 0x4 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_RO ( 0x5 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_IO ( 0x6 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_RPI ( 0x7 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_1E ( 0x8 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_1R ( 0x9 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_1E ( 0x1 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_1R ( 0x2 << BLIS_PACK_FORMAT_SHIFT )
#define BLIS_BITVAL_PACKED_UNSPEC ( BLIS_PACK_BIT )
#define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT )
#define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_4MS ( BLIS_PACK_BIT | BLIS_BITVAL_4MS | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_4MS ( BLIS_PACK_BIT | BLIS_BITVAL_4MS | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_3MS ( BLIS_PACK_BIT | BLIS_BITVAL_3MS | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_3MS ( BLIS_PACK_BIT | BLIS_BITVAL_3MS | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_1E ( BLIS_PACK_BIT | BLIS_BITVAL_1E | BLIS_PACK_PANEL_BIT )
#define BLIS_BITVAL_PACKED_COL_PANELS_1E ( BLIS_PACK_BIT | BLIS_BITVAL_1E | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT )
#define BLIS_BITVAL_PACKED_ROW_PANELS_1R ( BLIS_PACK_BIT | BLIS_BITVAL_1R | BLIS_PACK_PANEL_BIT )
@@ -553,20 +518,6 @@ typedef enum
BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS,
BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS,
BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS,
BLIS_PACKED_ROW_PANELS_4MI = BLIS_BITVAL_PACKED_ROW_PANELS_4MI,
BLIS_PACKED_COL_PANELS_4MI = BLIS_BITVAL_PACKED_COL_PANELS_4MI,
BLIS_PACKED_ROW_PANELS_3MI = BLIS_BITVAL_PACKED_ROW_PANELS_3MI,
BLIS_PACKED_COL_PANELS_3MI = BLIS_BITVAL_PACKED_COL_PANELS_3MI,
BLIS_PACKED_ROW_PANELS_4MS = BLIS_BITVAL_PACKED_ROW_PANELS_4MS,
BLIS_PACKED_COL_PANELS_4MS = BLIS_BITVAL_PACKED_COL_PANELS_4MS,
BLIS_PACKED_ROW_PANELS_3MS = BLIS_BITVAL_PACKED_ROW_PANELS_3MS,
BLIS_PACKED_COL_PANELS_3MS = BLIS_BITVAL_PACKED_COL_PANELS_3MS,
BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO,
BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO,
BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO,
BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO,
BLIS_PACKED_ROW_PANELS_RPI = BLIS_BITVAL_PACKED_ROW_PANELS_RPI,
BLIS_PACKED_COL_PANELS_RPI = BLIS_BITVAL_PACKED_COL_PANELS_RPI,
BLIS_PACKED_ROW_PANELS_1E = BLIS_BITVAL_PACKED_ROW_PANELS_1E,
BLIS_PACKED_COL_PANELS_1E = BLIS_BITVAL_PACKED_COL_PANELS_1E,
BLIS_PACKED_ROW_PANELS_1R = BLIS_BITVAL_PACKED_ROW_PANELS_1R,
@@ -574,10 +525,8 @@ typedef enum
} pack_t;
// We combine row and column packing into one "type", and we start
// with BLIS_PACKED_ROW_PANELS, _COLUMN_PANELS. We also count the
// schema pair for "4ms" (4m separated), because its bit value has
// been reserved, even though we don't use it.
#define BLIS_NUM_PACK_SCHEMA_TYPES 10
// with BLIS_PACKED_ROW_PANELS, _COLUMN_PANELS.
#define BLIS_NUM_PACK_SCHEMA_TYPES 3
// -- Pack order type --
@@ -670,12 +619,7 @@ typedef enum
typedef enum
{
BLIS_3MH = 0,
BLIS_3M1,
BLIS_4MH,
BLIS_4M1B,
BLIS_4M1A,
BLIS_1M,
BLIS_1M = 0,
BLIS_NAT,
BLIS_IND_FIRST = 0,
BLIS_IND_LAST = BLIS_NAT
@@ -683,13 +627,8 @@ typedef enum
#define BLIS_NUM_IND_METHODS (BLIS_NAT+1)
// These are used in bli_*_oapi.c to construct the ind_t values from
// These are used in bli_l3_*_oapi.c to construct the ind_t values from
// the induced method substrings that go into function names.
#define bli_3mh BLIS_3MH
#define bli_3m1 BLIS_3M1
#define bli_4mh BLIS_4MH
#define bli_4mb BLIS_4M1B
#define bli_4m1 BLIS_4M1A
#define bli_1m BLIS_1M
#define bli_nat BLIS_NAT
@@ -1255,9 +1194,6 @@ typedef struct
inc_t ps_a;
inc_t ps_b;
// The type to convert to on output.
//num_t dt_on_output;
} auxinfo_t;
@@ -1580,9 +1516,6 @@ typedef struct cntx_s
func_t unpackm_kers[ BLIS_NUM_UNPACKM_KERS ];
ind_t method;
pack_t schema_a_block;
pack_t schema_b_panel;
pack_t schema_c_panel;
} cntx_t;